def get_parfile_from_id(parfile_id, existdb=None, verify_md5=True): """Return the path to the raw file that has the given ID number. Optionally double check the file's MD5 sum, to make sure nothing strange has happened. Inputs: parfile_id: The ID number of the raw file to get a path for. existdb: A (optional) existing database connection object. (Default: Establish a db connection) verify_md5: If True, double check the file's MD5 sum. (Default: Perform MD5 check.) Output: fn: The full file path. """ notify.print_info("Looking-up raw file with ID=%d" % parfile_id, 2) # Use the existing DB connection, or open a new one if None was provided db = existdb or database.Database() db.connect() select = db.select([db.parfiles.c.filename, db.parfiles.c.filepath, db.parfiles.c.md5sum]).where( db.parfiles.c.parfile_id == parfile_id ) result = db.execute(select) rows = result.fetchall() result.close() if not existdb: # Close the DB connection we opened db.close() if len(rows) == 1: filename = rows[0]["filename"] filepath = rows[0]["filepath"] md5sum_from_db = rows[0]["md5sum"] else: raise errors.InconsistentDatabaseError( "Bad number of files (%d) " "with parfile_id=%d" % (len(rows), parfile_id) ) fullpath = os.path.join(filepath, filename) # Make sure the file exists datafile.verify_file_path(fullpath) if verify_md5: notify.print_info( "Confirming MD5 sum of %s matches what is " "stored in DB (%s)" % (fullpath, md5sum_from_db), 2 ) md5sum_file = datafile.get_md5sum(fullpath) if md5sum_from_db != md5sum_file: raise errors.FileError( "md5sum check of %s failed! MD5 from " "DB (%s) != MD5 from file (%s)" % (fullpath, md5sum_from_db, md5sum_file) ) return fullpath
def get_parfile_id(parfile, existdb=None): """Given a parfile path find its parfile_id number. Inputs: parfile: the path to a parfile. existdb: A (optional) existing database connection object. (Default: Establish a db connection) Output: parfile_id: the corresponding parfile_id value. """ # Use the exisitng DB connection, or open a new one if None was provided db = existdb or database.Database() db.connect() notify.print_info("Getting parfile ID for %s using " "filename and md5sum" % parfile, 2) path, fn = os.path.split(os.path.abspath(parfile)) md5sum = datafile.get_md5sum(parfile) select = db.select([db.parfiles.c.md5sum, db.parfiles.c.filename, db.parfiles.c.parfile_id]).where( (db.parfiles.c.md5sum == md5sum) | (db.parfiles.c.filename == fn) ) result = db.execute(select) rows = result.fetchall() result.close() if not existdb: db.close() if len(rows) == 1: row = rows[0] if row["md5sum"] == md5sum and row["filename"] == fn: return row["parfile_id"] elif row["md5sum"] == md5sum: raise errors.FileError( "A parfile (parfile_id=%d) with " "this md5sum, but a different filename " "exists in the DB." % row["parfile_id"] ) elif row["filename"] == fn: raise errors.FileError( "A parfile (parfile_id=%d) with " "this filename, but a different md5sum " "exists in the DB." % row["parfile_id"] ) else: raise errors.InconsistentDatabaseError( "A parfile (parfile_id=%d) " "matches our query, but neither its md5sum (%s), " "nor its filename (%s) appears to match! " "This should never happen!" % (row["parfile_id"], row["md5sum"], row["fn"]) ) elif len(rows) == 0: raise errors.ToasterError( "Input parfile (%s) does not appear " "to be registered in the DB! " "Use 'load_parfile.py' to load " "it into the DB." % parfile ) else: raise errors.InconsistentDatabaseError( "Multiple (%s) parfiles " "match the given file name or md5sum!" % len(rows) )