def test_guess_type(): log = logging.getLogger('test_guess_type') log.debug('Testing function for guess the type of a file') f = File(name=f"{os.getenv('DATADIR')}/test.txt") assert f.guess_type() == "TXT"
def test_check_if_exists(): log = logging.getLogger('test_check_if_exists') log.debug('Testing function for checking if a file exists') f = File( name=f"{os.getenv('DATADIR')}/test.txt", type="TYPE_F" ) assert f.check_if_exists() is True
def test_guess_type_default(): log = logging.getLogger('test_guess_type_default') log.debug('Testing function to check when a default ' 'type is assigned when the extension is' 'not recognised') new_fname = f"{os.getenv('DATADIR')}/test.2020.pippo" f = File(name=new_fname) assert f.guess_type() == "MISC"
def test_guess_type1(rand_file): log = logging.getLogger('test_guess_type1') log.debug('Testing function for guess the type of a complex filename') new_fname = f"{os.getenv('DATADIR')}/test_arch.2020.txt" # change the basename to something more complex os.rename(f"{os.getenv('DATADIR')}/test_arch.txt", new_fname) f = File(name=new_fname) # delete test type os.remove(new_fname) assert f.guess_type() == "TXT"
def test_f_wo_md5(): log = logging.getLogger('test_f_wo_md5') log.debug('Instantiation without md5sum') f = File( name=f"{os.getenv('DATADIR')}/test.txt", type="TYPE_F" ) assert f.md5 == "0b1578b3dbfca89caa03a88949d68fa4"
def test_f_wo_size(): log = logging.getLogger('test_f_wo_size') log.debug('Instantiation without file size') f = File( name=f"{os.getenv('DATADIR')}/test.txt", type="TYPE_F" ) assert f.size == 8
def test_f_wo_creation_date(): log = logging.getLogger('test_f_wo_creation_date') log.debug('Instantiation without creation date') f = File( name=f"{os.getenv('DATADIR')}/test.txt", type="TYPE_F" ) assert f.created == datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
def test_f_w_md5(): log = logging.getLogger('test_f_w_md5') log.debug('Instantiation with md5sum') f = File( name=f"{os.getenv('DATADIR')}/test.txt", type="TEST_F", md5="369ccfaf31586363bd645d48b72c09c4") assert f.md5 == "369ccfaf31586363bd645d48b72c09c4"
def print_changelog(self, ifile): """ Function that adds an entry to the CHANGELOG report file Parameters ---------- ifile : str path to CHANGELOG file that will be updated. Returns ------- None """ now_str = self.dtime.strftime('%Y-%m-%d') now_str1 = self.dtime.strftime('%Y%m%d') lines_to_add = now_str + "\n\n" for state, value in self.__dict__.items(): size = 0 if type(value) is set: size = len(value) elif type(value) is dict: size = len(value.keys()) if size == 0: continue types = [] for p in value: # create File object to get its type fObj = File(name=p) types.append(fObj.guess_type()) # remove duplicates from list types = list(set(types)) types = [s.lower() for s in types] # get the changelog_details dir from config dirname = CONFIG.get('ctree', 'chlog_details_dir') lines_to_add += "Modification to: {0}\n\n".format(",".join(types)) lines_to_add += "Details can be found in\n" \ "{0}/changelog_details_{1}_{2}\n\n".format(dirname, now_str1, state) with open(ifile, 'r+') as f: content = f.read() f.seek(0, 0) f.write(lines_to_add.rstrip('\r\n') + '\n\n' + content)
def load_file(rand_file, db_obj): """ Fixture to load a file to the RESEQTRACK DB """ print('Running fixture to load test file in the DB') fObj = File(name=rand_file.name, type="TYPE_F") db_obj.load_file(fObj, dry=False) print('Running fixture to load test file in the DB. DONE...') return rand_file.name
def test_load_f(db_obj, del_obj): log = logging.getLogger('test_load_f') log.debug('Testing \'load_file\' function to load file entry in DB') f = File(name=os.getenv('DATADIR') + "/test.txt", type="TYPE_F") db_obj.load_file(f, dry=False) del_obj.append(f)
def test_update_f(db_obj, del_obj): log = logging.getLogger('test_update_f') log.debug("Testing \'update_file\' function to update an entry " "in the \'file\' table of the DB") # First, load file entry in the database f = File(name=os.getenv('DATADIR') + "/test.txt", type="TYPE_F") db_obj.load_file(f, dry=False) # Now, modify the file path for # entry in the 'file' table db_obj.update_file(attr_name='name', value=os.getenv('DATADIR') + "test1.txt", name=os.getenv('DATADIR') + "test.txt", dry=False) f1 = File(name=os.getenv('DATADIR') + "test1.txt", type="TYPE_F") del_obj.append(f1)
def load_file_list(rand_filelst, db_obj): """ Fixture to load a list of files to the RESEQTRACK DB """ print('Running fixture to load a list of test files in the DB') with open(rand_filelst) as f: for p in f: p = p.rstrip("\n") fObj = File(name=p, type="TYPE_F") db_obj.load_file(fObj, dry=False) print('Running fixture to load a list of test files in the DB. DONE...') return rand_filelst
def test_delete_f(db_obj): log = logging.getLogger('test_delete_f') log.debug( 'Testing \'delete_file\' function to delete a file entry in the DB') f = File(name=os.getenv('DATADIR') + "/test.txt", type="TYPE_F") # Load file in db before deleting it db_obj.load_file(f, dry=False) # Now, delete it db_obj.delete_file(f, dry=False)
def push_file(rand_file, db_obj, conn_api): """ Fixture to push a file to FIRE. It will also load the files to the RESEQTRACK DB """ print('Running fixture to push a test file to FIRE') fObj = File(name=rand_file.name, type="TYPE_F") db_obj.load_file(fObj, dry=False) conn_api.push_object(fileO=fObj, fire_path="test_arch.txt", dry=False) print('Running fixture to push a test file to FIRE. DONE...') return os.path.abspath(rand_file.name)
def loaded_obj(): log = logging.getLogger('loaded_obj') log.debug('push FIRE object') # creating File object f = File(name=os.getenv('DATADIR') + "/test.txt", type="TEST_F", md5sum="f5aa4f4f1380b71acc56750e9f8ff825") fobject = api.push_object(fileO=f, dry=False, fire_path="test_dir/test.txt", publish=False) return fobject
def push_ctree(self, dry=True): """ Function to push self.staging_tree to the archive. This function will follow these steps: 1) Update the metadata for current.tree entry in the DB 2) Create a backup for self.prod_tree 3) Delete self.prod_tree 4) Push self.staging_tree to the archive Parameters ---------- dry: bool, default=True Perform a dry run. Returns ------- fire_path : str Fire path of the pushed current.tree. """ # updating metadata for existing staging_tree file in the DB staging_fobj = File(name=self.staging_tree) self.db.update_file('md5', staging_fobj.md5, self.prod_tree, dry=dry) self.db.update_file('size', staging_fobj.size, self.prod_tree, dry=dry) # create a backup for self.prod_tree basename = os.path.basename(self.prod_tree) fire_path = f"{CONFIG.get('ctree', 'ctree_fpath')}/{basename}" prod_file = self.api.retrieve_object( firePath=fire_path, outfile=f"{CONFIG.get('ctree', 'backup')}/{basename}." f"{self.dtime}.backup") if prod_file is None: raise Exception(f"No current.tree file retrieved from the archive") # delete self.prod_tree from archive fire_obj = self.api.fetch_object(firePath=fire_path) if fire_obj is None: raise Exception(f"No current.tree file retrieved from the archive") self.api.delete_object(fireOid=fire_obj.fireOid, dry=dry) # push self.staging_tree to archive basename = os.path.basename(self.staging_tree) fire_path = f"{CONFIG.get('ctree', 'ctree_fpath')}/{basename}" self.api.push_object(fileO=staging_fobj, fire_path=fire_path, dry=dry) return fire_path
def fetch_file(self, path=None, basename=None): """ Function to fetch a certain entry from the table 'file' in self.dbname Parameters ---------- path : str, optional Path of file to be retrieved. basename : str, optional Basename of file to be retrieved. Returns ------- f : file.file.File or None object retrieved from DB None if no file was retrieved. Raises ------ pymysql.Error """ cursor = self.conn.cursor(pymysql.cursors.DictCursor) if path is not None: db_logger.debug(f"Fetching file with path: {path}") abs_path = os.path.abspath(path) query = "SELECT * FROM file WHERE name = %s" cursor.execute(query, [abs_path]) elif basename is not None: db_logger.debug(f"Fetching file with basename: {basename}") query = "SELECT * FROM file WHERE name like %s" cursor.execute(query, ['%' + basename]) try: result_set = cursor.fetchall() if not result_set: db_logger.debug( f"No file retrieved from DB using using path:{path}") return None for row in result_set: f = File(**row) return f cursor.close() self.conn.commit() except pymysql.Error as e: db_logger.error("Exception occurred", exc_info=True) # Rollback in case there is any error self.conn.rollback()
def test_push_object(del_obj): """ This test will fail if an Exception is raised The push_object will be invoked without fire_path and will not publish """ log = logging.getLogger('test_push_object') log.debug('Pushing (upload) a file.file.File object to FIRE') # creating File object f = File(name=os.getenv('DATADIR') + "/test.txt", type="TEST_F", md5sum="f5aa4f4f1380b71acc56750e9f8ff825") fobj = api.push_object(fileO=f, dry=False, publish=False) del_obj.append(fobj.fireOid)
def test_fetch_f_exists_w_path(db_obj, del_obj): log = logging.getLogger('test_fetch_f_exists_w_path') log.debug('Testing \'fetch_file\' function to fetch an existing' 'file from the DB using its path') rel_path = os.getenv('DATADIR') + "/test.txt" # First, load file entry in the database f = File(name=rel_path, type="TYPE_F") db_obj.load_file(f, dry=False) # path provided here points to the loaded file rf = db_obj.fetch_file(path=rel_path) del_obj.append(rf) assert rf.name == os.path.abspath(rel_path)
def push_chlog_details(self, pathlist, db, api, dry=True): """ Function to push the change changelog_details_* files to the archive. This function will do the following: 1) Load the new changelog_details_* files to the DB 2) Push the new changelog_details_* files to the archive Parameters ---------- pathlist : list pf str List with paths of the changelog_details_* files (resulting from running self.print_chlog_details). db : DB connection object. api : API connection object. dry: bool, default=True Perform a dry run. Returns ------- pushed_files : list of str list with the Fire paths of the pushed changelog_details_*. """ ce_logger.info("Pushing changelog_details_* files to the archive") pushed_files = [] for p in pathlist: basename = os.path.basename(p) fObj = File(name=p, type="CHANGELOG") new_path = f"{CONFIG.get('ftp','ftp_mount')}{CONFIG.get('ctree','chlog_details_dir')}/{basename}" db.load_file(fObj, dry=dry) api.push_object( fObj, dry=dry, publish=True, fire_path= f"{CONFIG.get('ctree', 'chlog_details_dir')}/{basename}") pushed_files.append( f"{CONFIG.get('ctree', 'chlog_details_dir')}/{basename}") db.update_file('name', new_path, fObj.name, dry=dry) return pushed_files
def test_push_object_w_fpath(del_obj): """ This test will fail if an Exception is raised """ log = logging.getLogger('test_push_object_w_fpath') log.debug('Pushing (upload) a file.file.File object to FIRE adding a ' 'virtual FIRE path') # creating File object f = File(name=os.getenv('DATADIR') + "/test.txt", type="TEST_F", md5sum="f5aa4f4f1380b71acc56750e9f8ff825") fobj = api.push_object(fileO=f, dry=False, publish=False, fire_path="test_dir/test.txt") del_obj.append(fobj.fireOid)
def test_push_comp_object_w_fpath(del_obj): """ Test used to check if 'push_object' works also with a *.gz compressed file """ log = logging.getLogger('test_push_comp_object_w_fpath') log.debug( 'Pushing (upload) a compressed file.file.File object to FIRE adding a ' 'virtual FIRE path') # creating File object f = File(name=os.getenv('DATADIR') + "/test.txt.gz", type="TEST_F", md5sum="a32c5f11391b49b0788def64d28f8807") fobj = api.push_object(fileO=f, dry=False, fire_path="test_dir/test.txt.gz") del_obj.append(fobj.fireOid)
def push_file_list(rand_filelst, db_obj, conn_api): """ Fixture to push a list of files to FIRE. It will also load the files to the RESEQTRACK DB Returns ------- File with the lisf of file paths that have been pushed """ print('Running fixture to push a list of test files to FIRE') with open(rand_filelst) as f: for p in f: p = p.rstrip("\n") fObj = File(name=p, type="TYPE_F") db_obj.load_file(fObj, dry=False) basename = os.path.basename(p) conn_api.push_object(fileO=fObj, fire_path=basename, dry=False) print('Running fixture to push a list of test files to FIRE. DONE...') return rand_filelst
abs_path = os.path.abspath(path) fire_path = re.sub(settingsO.get('ftp', 'ftp_mount') + "/", '', abs_path) dearch_f = db.fetch_file(path=abs_path) assert dearch_f is not None, f"File entry with path {abs_path} does not exist in the DB. " \ f"Can't proceed" # check if 'path' exists in FIRE dearch_fobj = api.fetch_object(firePath=fire_path) assert dearch_fobj is not None, f"File entry with firePath {fire_path} is not archived in FIRE. " \ f"Can't proceed" # download the file # construct path to store the dearchived file logger.info(f"Downloading file to be dearchived: {abs_path}") basename = os.path.basename(abs_path) downloaded_path = os.path.join(args.directory, basename) api.retrieve_object(fireOid=dearch_fobj.fireOid, outfile=downloaded_path) logger.info(f"Download completed!") if str2bool(args.md5check) is True: logger.info("Checking if the md5sum of the retrieved and archived" " object matches") f = File(name=downloaded_path) assert f.md5 == dearch_fobj.objectMd5, "downloaded file and archived object md5sums do" \ " not match. Can't continue" logger.info("md5sums match. Will continue dearchiving FIRE object") # delete FIRE object api.delete_object(fireOid=dearch_fobj.fireOid, dry=str2bool(args.dry)) # finally, delete de-archived file from RESEQTRACK DB db.delete_file(dearch_f, dry=str2bool(args.dry))
logger.info( f"It seems that file: {f} is already archived and --update_existing is True" ) logger.info(f"Archived file will be updated with new file") # First, retrieve the FIRE object dearch_fobj = api.fetch_object(firePath=fire_path) assert dearch_fobj is not None, f"Object with FIRE path: {fire_path} was not retrieved" # delete the FIRE object api.delete_object(fireOid=dearch_fobj.fireOid, dry=str2bool(args.dry)) # Create File object pointing to the file placed in the staging area f_in_staging = File(name=f) # now update the metadata for f_inftp_o status_code = db.update_file(attr_name='md5', value=f_in_staging.md5, name=ftp_path, dry=str2bool(args.dry)) assert status_code == 0, "Something went wrong when updating the 'md5' field of the entry in the 'File'" \ "table of the the DB" status_code = db.update_file(attr_name='size', value=f_in_staging.size, name=ftp_path, dry=str2bool(args.dry)) assert status_code == 0, "Something went wrong when updating the 'size' field of the entry in the 'File'" \ "table of the the DB"
def update_CHANGELOG(self, chlog_p, db, api, dry=True): """ Function to push the updated CHANGELOG file to FIRE. This function will do the following: 1) Update the CHANGELOG file metadata in the DB 2) Create a backup copy of the CHANGELOG file before being updated 3) Delete the old CHANGELOG file from FIRE 4) Push the new (updated) CHANGELOG file to FIRE 5) Delete the backed-up file if everything went well Parameters ---------- chlog_p : str path to updated CHANGELOG file that will be pushed to FIRE. db : DB connection object. api : API connection object. dry: bool, default=True Perform a dry run. Returns ------- str : Fire path of the updated CHANGELOG files Raises ------ Exception """ dtstr = self.dtime.now().strftime('%Y_%m_%dT%H%M%S') # update the CHANGELOG metadata in the DB chlog_obj = File(name=chlog_p) chlog_obj.md5 = chlog_obj.calc_md5() chlog_obj.size = os.path.getsize(chlog_obj.name) # get the current path to CHANGELOG so it is updated in DB chglog_p = f"{CONFIG.get('ftp', 'ftp_mount')}{CONFIG.get('ctree', 'chlog_fpath')}" db.update_file('md5', chlog_obj.md5, chglog_p, dry=dry) db.update_file('size', chlog_obj.size, chglog_p, dry=dry) ce_logger.info("Pushing updated CHANGELOG file to API") # to push the updated CHANGELOG you need to delete it from FIRE first old_file = api.retrieve_object( firePath=CONFIG.get('ctree', 'chlog_fpath'), outfile= f"{CONFIG.get('ctree','backup')}/{os.path.basename(chlog_p)}." f"{dtstr}.backup") if old_file is None: raise Exception(f"No CHANGELOG file retrieved from the archive") fire_obj = api.fetch_object( firePath=CONFIG.get('ctree', 'chlog_fpath')) if fire_obj is None: raise Exception(f"No CHANGELOG file retrieved from the archive") ce_logger.info("Delete CHANGELOG to be updated from the archive") api.delete_object(fireOid=fire_obj.fireOid, dry=dry) ce_logger.info("Push updated CHANGELOG file to the archive") api.push_object(chlog_obj, dry=dry, fire_path=CONFIG.get('ctree', 'chlog_fpath')) return f"{CONFIG.get('ctree','chlog_fpath')}"
"$DBPWD undefined. You need either to pass the password of the MYSQL " "server containing the RESEQTRACK database using the --pwd option or set a $DBPWD environment " "variable before running this script!") if not os.path.isfile(args.settings): raise Exception( f"Config file provided using --settings option({args.settings}) not found!" ) # Class to connect with Reseqtrack DB db = DB(pwd=pwd, dbname=dbname) if args.file: logger.info('File provided using -f, --file option') f = File(name=args.file) db.delete_file(f, dry=str2bool(args.dry)) elif args.list_file: logger.info('File with paths provided using -l, --list_file option') for path in args.list_file: path = path.rstrip("\n") cols = re.split(' +', path) if len(cols) > 1: raise Exception( "Incorrect number of columns in file used for --list_file. " "Check format. File should have a different path per line with no whitespaces " "in the path.") rf = db.fetch_file(path=path) if rf is None:
raise Exception( "$DBPWD undefined. You need either to pass the password of the MYSQL " "server containing the RESEQTRACK database using the --pwd option or set a $DBPWD environment " "variable before running this script!") # Class to connect with Reseqtrack DB db = DB(pwd=pwd, dbname=dbname) # list with paths to be loaded files = [] if args.file: logger.info('File provided using -f, --file option') if args.type is not None: logger.debug('Type provided using -t, --type option') f = File(name=args.file, type=args.type) else: logger.debug('No file type provided using -t, --type option') logger.debug('File type will be guessed from its file extension') f = File(name=args.file, settingsf=args.settings) ftype = f.guess_type() f.type = ftype files.append(f) elif args.list_file: logger.info('File with paths provided using -l, --list_file option') for path in args.list_file: path = path.rstrip("\n") cols = re.split(' +', path) if len(cols) > 1: