def testAddColumnHeaderRowToCsvFile(self): adit_fn = ( "adit_nmr_matched_pdb_bmrb_entry_ids.csv" ) # already contains one header row but let's add another one. src = os.path.join(cingRoot, matchBmrbPdbDataDir, adit_fn) copyfile(src, adit_fn) columnOrder = "bmrb_id pdb_id".split() self.assertFalse(addColumnHeaderRowToCsvFile(adit_fn, columnOrder))
def prepare(self): 'Return True on error.' if self.restartFromScratch: rmdir(matchBmrbPdbDir) if not os.path.exists(matchBmrbPdbDir): csvFileDir = os.path.join(cingRoot, matchBmrbPdbDataDir) nTmessage("Recreating data dir %s from SVN %s" % (matchBmrbPdbDir, csvFileDir)) # mkdirs( matchBmrbPdbDir ) copytree(csvFileDir, matchBmrbPdbDir) else: nTmessage("Reusing existing data dir " + matchBmrbPdbDir) os.chdir(matchBmrbPdbDir) if 1: # DEFAULT: 1 nTmessage("Getting ADIT from: %s" % self.adit_url) if os.path.exists(self.adit_fn): os.unlink(self.adit_fn) # prevent buildup of endless copies. wgetProgram = ExecuteProgram('wget --no-verbose %s' % self.adit_url, redirectOutputToFile ='getAdit.log' ) exitCode = wgetProgram() if exitCode: nTerror("Failed to download file %s" % self.adit_url) return True if not os.path.exists(self.adit_fn): nTerror("Failed to find downloaded file %s" % self.adit_url) return True columnOrder = 'bmrb_id pdb_id'.split() if addColumnHeaderRowToCsvFile(self.adit_fn, columnOrder): nTerror("Failed to add header row to " + self.adit_fn) return True nTmessage("Got the ADIT info") if 1: # DEFAULT: 1 nTmessage("Getting BMRB file list from : %s" % bmrbDir) bmrbFileList = findFiles("bmr*_21.str", bmrbDir) bmrbIdList = [] for bmrbFile in bmrbFileList: _directory, basename, _extension = nTpath(bmrbFile) bmrbId = int(basename[3:-3]) # bmr970_21 -> 970 bmrbIdList.append(bmrbId) bmrbIdList.sort() bmrbId2List = getBmrbEntries() bmrbIdNTList = NTlist(*bmrbIdList) bmrbId2NTList = NTlist(*bmrbId2List) bmrbIdNTmissingList = bmrbIdNTList.difference(bmrbId2NTList) if bmrbIdNTmissingList: nTmessage("Found %d entries on file but not in DB: %s" % (len(bmrbIdNTmissingList), str(bmrbIdNTmissingList))) bmrbId2NTmissingList = bmrbId2NTList.difference(bmrbIdNTList) if bmrbId2NTmissingList: nTmessage("Found %d entries in DB but not on file: %s" % (len(bmrbId2NTmissingList), str(bmrbId2NTmissingList))) if len( bmrbIdNTmissingList + bmrbId2NTmissingList ) > 40: # was 18 + 3=21 on April 11, 2011. nTwarning("More than one hundred inconsistencies between BMRB DB and on file.") bmrbIdStrList = ['bmrb_id'] + [ str(x) for x in bmrbIdList] # add header for CSV reader. fileName = os.path.join( matchBmrbPdbDir, 'bmrb.csv') txt = '\n'.join(bmrbIdStrList) if writeTextToFile(fileName, txt): return True if 1: # DEFAULT: 1 dbms2 = DBMS() pdbList = getPdbEntries(onlyNmr = True) pdbNmrTable = Relation('pdbNmr', dbms2, columnList=['pdb_id']) pdbIdColumn = pdbNmrTable.getColumnByIdx(0) # pylint: disable=W0612 pdbIdColumn += pdbList pdbNmrTable.writeCsvFile('pdbNmrTable.csv')
def testAddColumnHeaderRowToCsvFile(self): adit_fn = 'adit_nmr_matched_pdb_bmrb_entry_ids.csv' # already contains one header row but let's add another one. src = os.path.join(cingRoot, matchBmrbPdbDataDir, adit_fn) copyfile(src, adit_fn) columnOrder = 'bmrb_id pdb_id'.split() self.assertFalse(addColumnHeaderRowToCsvFile(adit_fn, columnOrder))
def prepare(self): 'Return True on error.' if self.restartFromScratch: rmdir(matchBmrbPdbDir) if not os.path.exists(matchBmrbPdbDir): csvFileDir = os.path.join(cingRoot, matchBmrbPdbDataDir) nTmessage("Recreating data dir %s from SVN %s" % (matchBmrbPdbDir, csvFileDir)) # mkdirs( matchBmrbPdbDir ) copytree(csvFileDir, matchBmrbPdbDir) else: nTmessage("Reusing existing data dir " + matchBmrbPdbDir) os.chdir(matchBmrbPdbDir) if 1: # DEFAULT: 1 nTmessage("Getting ADIT from: %s" % self.adit_url) if os.path.exists(self.adit_fn): os.unlink(self.adit_fn) # prevent buildup of endless copies. wgetProgram = ExecuteProgram('wget --no-verbose %s' % self.adit_url, redirectOutputToFile='getAdit.log') exitCode = wgetProgram() if exitCode: nTerror("Failed to download file %s" % self.adit_url) return True if not os.path.exists(self.adit_fn): nTerror("Failed to find downloaded file %s" % self.adit_url) return True columnOrder = 'bmrb_id pdb_id'.split() if addColumnHeaderRowToCsvFile(self.adit_fn, columnOrder): nTerror("Failed to add header row to " + self.adit_fn) return True nTmessage("Got the ADIT info") if 1: # DEFAULT: 1 nTmessage("Getting BMRB file list from : %s" % bmrbDir) bmrbFileList = findFiles("bmr*_21.str", bmrbDir) bmrbIdList = [] for bmrbFile in bmrbFileList: _directory, basename, _extension = nTpath(bmrbFile) bmrbId = int(basename[3:-3]) # bmr970_21 -> 970 bmrbIdList.append(bmrbId) bmrbIdList.sort() bmrbId2List = getBmrbEntries() bmrbIdNTList = NTlist(*bmrbIdList) bmrbId2NTList = NTlist(*bmrbId2List) bmrbIdNTmissingList = bmrbIdNTList.difference(bmrbId2NTList) if bmrbIdNTmissingList: nTmessage("Found %d entries on file but not in DB: %s" % (len(bmrbIdNTmissingList), str(bmrbIdNTmissingList))) bmrbId2NTmissingList = bmrbId2NTList.difference(bmrbIdNTList) if bmrbId2NTmissingList: nTmessage( "Found %d entries in DB but not on file: %s" % (len(bmrbId2NTmissingList), str(bmrbId2NTmissingList))) if len(bmrbIdNTmissingList + bmrbId2NTmissingList ) > 40: # was 18 + 3=21 on April 11, 2011. nTwarning( "More than one hundred inconsistencies between BMRB DB and on file." ) bmrbIdStrList = ['bmrb_id'] + [str(x) for x in bmrbIdList ] # add header for CSV reader. fileName = os.path.join(matchBmrbPdbDir, 'bmrb.csv') txt = '\n'.join(bmrbIdStrList) if writeTextToFile(fileName, txt): return True if 1: # DEFAULT: 1 dbms2 = DBMS() pdbList = getPdbEntries(onlyNmr=True) pdbNmrTable = Relation('pdbNmr', dbms2, columnList=['pdb_id']) pdbIdColumn = pdbNmrTable.getColumnByIdx(0) # pylint: disable=W0612 pdbIdColumn += pdbList pdbNmrTable.writeCsvFile('pdbNmrTable.csv')