Exemplo n.º 1
0
def test_guess_type():
    log = logging.getLogger('test_guess_type')
    log.debug('Testing function for guess the type of a file')

    f = File(name=f"{os.getenv('DATADIR')}/test.txt")

    assert f.guess_type() == "TXT"
Exemplo n.º 2
0
def test_check_if_exists():
    log = logging.getLogger('test_check_if_exists')
    log.debug('Testing function for checking if a file exists')

    f = File(
        name=f"{os.getenv('DATADIR')}/test.txt",
        type="TYPE_F"
    )

    assert f.check_if_exists() is True
Exemplo n.º 3
0
def test_guess_type_default():
    log = logging.getLogger('test_guess_type_default')
    log.debug('Testing function to check when a default '
              'type is assigned when the extension is'
              'not recognised')

    new_fname = f"{os.getenv('DATADIR')}/test.2020.pippo"
    f = File(name=new_fname)

    assert f.guess_type() == "MISC"
Exemplo n.º 4
0
def test_guess_type1(rand_file):
    log = logging.getLogger('test_guess_type1')
    log.debug('Testing function for guess the type of a complex filename')
    new_fname = f"{os.getenv('DATADIR')}/test_arch.2020.txt"
    # change the basename to something more complex
    os.rename(f"{os.getenv('DATADIR')}/test_arch.txt", new_fname)
    f = File(name=new_fname)

    # delete test type
    os.remove(new_fname)
    assert f.guess_type() == "TXT"
Exemplo n.º 5
0
def test_f_wo_md5():
    log = logging.getLogger('test_f_wo_md5')
    log.debug('Instantiation without md5sum')
    f = File(
        name=f"{os.getenv('DATADIR')}/test.txt",
        type="TYPE_F"
    )

    assert f.md5 == "0b1578b3dbfca89caa03a88949d68fa4"
Exemplo n.º 6
0
def test_f_wo_size():
    log = logging.getLogger('test_f_wo_size')
    log.debug('Instantiation without file size')

    f = File(
        name=f"{os.getenv('DATADIR')}/test.txt",
        type="TYPE_F"
        )

    assert f.size == 8
Exemplo n.º 7
0
def test_f_wo_creation_date():
    log = logging.getLogger('test_f_wo_creation_date')
    log.debug('Instantiation without creation date')

    f = File(
        name=f"{os.getenv('DATADIR')}/test.txt",
        type="TYPE_F"
    )

    assert f.created == datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
Exemplo n.º 8
0
def test_f_w_md5():
    log = logging.getLogger('test_f_w_md5')
    log.debug('Instantiation with md5sum')

    f = File(
        name=f"{os.getenv('DATADIR')}/test.txt",
        type="TEST_F",
        md5="369ccfaf31586363bd645d48b72c09c4")

    assert f.md5 == "369ccfaf31586363bd645d48b72c09c4"
Exemplo n.º 9
0
    def print_changelog(self, ifile):
        """
        Function that adds an entry to the CHANGELOG report
        file

        Parameters
        ----------
        ifile : str
                path to CHANGELOG file that will be updated.
        
        Returns
        -------
        None
        """
        now_str = self.dtime.strftime('%Y-%m-%d')
        now_str1 = self.dtime.strftime('%Y%m%d')

        lines_to_add = now_str + "\n\n"
        for state, value in self.__dict__.items():
            size = 0
            if type(value) is set:
                size = len(value)
            elif type(value) is dict:
                size = len(value.keys())
            if size == 0: continue
            types = []
            for p in value:
                # create File object to get its type
                fObj = File(name=p)
                types.append(fObj.guess_type())
            # remove duplicates from list
            types = list(set(types))
            types = [s.lower() for s in types]
            # get the changelog_details dir from config
            dirname = CONFIG.get('ctree', 'chlog_details_dir')
            lines_to_add += "Modification to: {0}\n\n".format(",".join(types))
            lines_to_add += "Details can be found in\n" \
                            "{0}/changelog_details_{1}_{2}\n\n".format(dirname,
                                                                       now_str1, state)
        with open(ifile, 'r+') as f:
            content = f.read()
            f.seek(0, 0)
            f.write(lines_to_add.rstrip('\r\n') + '\n\n' + content)
Exemplo n.º 10
0
def load_file(rand_file, db_obj):
    """
    Fixture to load a file to the RESEQTRACK DB
    """
    print('Running fixture to load test file in the DB')
    fObj = File(name=rand_file.name, type="TYPE_F")

    db_obj.load_file(fObj, dry=False)
    print('Running fixture to load test file in the DB. DONE...')
    return rand_file.name
Exemplo n.º 11
0
def test_load_f(db_obj, del_obj):
    log = logging.getLogger('test_load_f')

    log.debug('Testing \'load_file\' function to load file entry in DB')

    f = File(name=os.getenv('DATADIR') + "/test.txt", type="TYPE_F")

    db_obj.load_file(f, dry=False)

    del_obj.append(f)
Exemplo n.º 12
0
def test_update_f(db_obj, del_obj):
    log = logging.getLogger('test_update_f')

    log.debug("Testing \'update_file\' function to update an entry "
              "in the \'file\' table of the DB")

    # First, load file entry in the database
    f = File(name=os.getenv('DATADIR') + "/test.txt", type="TYPE_F")

    db_obj.load_file(f, dry=False)

    # Now, modify the file path for
    # entry in the 'file' table
    db_obj.update_file(attr_name='name',
                       value=os.getenv('DATADIR') + "test1.txt",
                       name=os.getenv('DATADIR') + "test.txt",
                       dry=False)

    f1 = File(name=os.getenv('DATADIR') + "test1.txt", type="TYPE_F")

    del_obj.append(f1)
Exemplo n.º 13
0
def load_file_list(rand_filelst, db_obj):
    """
    Fixture to load a list of files to the RESEQTRACK DB
    """
    print('Running fixture to load a list of test files in the DB')
    with open(rand_filelst) as f:
        for p in f:
            p = p.rstrip("\n")
            fObj = File(name=p, type="TYPE_F")
            db_obj.load_file(fObj, dry=False)

    print('Running fixture to load a list of test files in the DB. DONE...')
    return rand_filelst
Exemplo n.º 14
0
def test_delete_f(db_obj):
    log = logging.getLogger('test_delete_f')

    log.debug(
        'Testing \'delete_file\' function to delete a file entry in the DB')

    f = File(name=os.getenv('DATADIR') + "/test.txt", type="TYPE_F")

    # Load file in db before deleting it
    db_obj.load_file(f, dry=False)

    # Now, delete it
    db_obj.delete_file(f, dry=False)
Exemplo n.º 15
0
def push_file(rand_file, db_obj, conn_api):
    """
    Fixture to push a file to FIRE. It will also load the files to the RESEQTRACK DB
    """
    print('Running fixture to push a test file to FIRE')

    fObj = File(name=rand_file.name, type="TYPE_F")

    db_obj.load_file(fObj, dry=False)

    conn_api.push_object(fileO=fObj, fire_path="test_arch.txt", dry=False)

    print('Running fixture to push a test file to FIRE. DONE...')
    return os.path.abspath(rand_file.name)
Exemplo n.º 16
0
def loaded_obj():

    log = logging.getLogger('loaded_obj')
    log.debug('push FIRE object')

    # creating File object
    f = File(name=os.getenv('DATADIR') + "/test.txt",
             type="TEST_F",
             md5sum="f5aa4f4f1380b71acc56750e9f8ff825")

    fobject = api.push_object(fileO=f,
                              dry=False,
                              fire_path="test_dir/test.txt",
                              publish=False)
    return fobject
Exemplo n.º 17
0
    def push_ctree(self, dry=True):
        """
        Function to push self.staging_tree to the archive.
        This function will follow these steps:
        1) Update the metadata for current.tree entry in the DB
        2) Create a backup for self.prod_tree
        3) Delete self.prod_tree
        4) Push self.staging_tree to the archive

        Parameters
        ----------
        dry: bool, default=True
             Perform a dry run.

        Returns
        -------
        fire_path : str
                    Fire path of the pushed current.tree.
        """
        # updating metadata for existing staging_tree file in the DB
        staging_fobj = File(name=self.staging_tree)
        self.db.update_file('md5', staging_fobj.md5, self.prod_tree, dry=dry)
        self.db.update_file('size', staging_fobj.size, self.prod_tree, dry=dry)

        # create a backup for self.prod_tree
        basename = os.path.basename(self.prod_tree)
        fire_path = f"{CONFIG.get('ctree', 'ctree_fpath')}/{basename}"
        prod_file = self.api.retrieve_object(
            firePath=fire_path,
            outfile=f"{CONFIG.get('ctree', 'backup')}/{basename}."
            f"{self.dtime}.backup")

        if prod_file is None:
            raise Exception(f"No current.tree file retrieved from the archive")

        # delete self.prod_tree from archive
        fire_obj = self.api.fetch_object(firePath=fire_path)

        if fire_obj is None:
            raise Exception(f"No current.tree file retrieved from the archive")
        self.api.delete_object(fireOid=fire_obj.fireOid, dry=dry)

        # push self.staging_tree to archive
        basename = os.path.basename(self.staging_tree)
        fire_path = f"{CONFIG.get('ctree', 'ctree_fpath')}/{basename}"
        self.api.push_object(fileO=staging_fobj, fire_path=fire_path, dry=dry)
        return fire_path
Exemplo n.º 18
0
    def fetch_file(self, path=None, basename=None):
        """
        Function to fetch a certain entry from the table 'file' in
        self.dbname

        Parameters
        ----------
        path : str, optional
               Path of file to be retrieved.
        basename : str, optional
                   Basename of file to be retrieved.

        Returns
        -------
        f : file.file.File or None
            object retrieved from DB
            None if no file was retrieved.
        
        Raises
        ------
        pymysql.Error
        """
        cursor = self.conn.cursor(pymysql.cursors.DictCursor)
        if path is not None:
            db_logger.debug(f"Fetching file with path: {path}")
            abs_path = os.path.abspath(path)
            query = "SELECT * FROM file WHERE name = %s"
            cursor.execute(query, [abs_path])
        elif basename is not None:
            db_logger.debug(f"Fetching file with basename: {basename}")
            query = "SELECT * FROM file WHERE name like %s"
            cursor.execute(query, ['%' + basename])
        try:
            result_set = cursor.fetchall()
            if not result_set:
                db_logger.debug(
                    f"No file retrieved from DB using using path:{path}")
                return None
            for row in result_set:
                f = File(**row)
                return f
            cursor.close()
            self.conn.commit()
        except pymysql.Error as e:
            db_logger.error("Exception occurred", exc_info=True)
            # Rollback in case there is any error
            self.conn.rollback()
Exemplo n.º 19
0
def test_push_object(del_obj):
    """
    This test will fail if an Exception is raised
    The push_object will be invoked without fire_path and will not publish
    """
    log = logging.getLogger('test_push_object')

    log.debug('Pushing (upload) a file.file.File object to FIRE')

    # creating File object
    f = File(name=os.getenv('DATADIR') + "/test.txt",
             type="TEST_F",
             md5sum="f5aa4f4f1380b71acc56750e9f8ff825")

    fobj = api.push_object(fileO=f, dry=False, publish=False)

    del_obj.append(fobj.fireOid)
Exemplo n.º 20
0
def test_fetch_f_exists_w_path(db_obj, del_obj):
    log = logging.getLogger('test_fetch_f_exists_w_path')

    log.debug('Testing \'fetch_file\' function to fetch an existing'
              'file from the DB using its path')

    rel_path = os.getenv('DATADIR') + "/test.txt"
    # First, load file entry in the database
    f = File(name=rel_path, type="TYPE_F")

    db_obj.load_file(f, dry=False)

    # path provided here points to the loaded file
    rf = db_obj.fetch_file(path=rel_path)

    del_obj.append(rf)

    assert rf.name == os.path.abspath(rel_path)
Exemplo n.º 21
0
    def push_chlog_details(self, pathlist, db, api, dry=True):
        """
        Function to push the change changelog_details_* files to the archive.
        This function will do the following:

        1) Load the new changelog_details_* files to the DB
        2) Push the new changelog_details_* files to the archive

        Parameters
        ----------
        pathlist : list pf str
                   List with paths of the changelog_details_* files
                   (resulting from running self.print_chlog_details).
        db : DB connection object.
        api : API connection object.
        dry: bool, default=True
             Perform a dry run.

        Returns
        -------
        pushed_files : list of str
                       list with the Fire paths of the pushed changelog_details_*.
        """

        ce_logger.info("Pushing changelog_details_* files to the archive")

        pushed_files = []
        for p in pathlist:
            basename = os.path.basename(p)
            fObj = File(name=p, type="CHANGELOG")
            new_path = f"{CONFIG.get('ftp','ftp_mount')}{CONFIG.get('ctree','chlog_details_dir')}/{basename}"
            db.load_file(fObj, dry=dry)
            api.push_object(
                fObj,
                dry=dry,
                publish=True,
                fire_path=
                f"{CONFIG.get('ctree', 'chlog_details_dir')}/{basename}")
            pushed_files.append(
                f"{CONFIG.get('ctree', 'chlog_details_dir')}/{basename}")
            db.update_file('name', new_path, fObj.name, dry=dry)

        return pushed_files
Exemplo n.º 22
0
def test_push_object_w_fpath(del_obj):
    """
    This test will fail if an Exception is raised
    """
    log = logging.getLogger('test_push_object_w_fpath')

    log.debug('Pushing (upload) a file.file.File object to FIRE adding a '
              'virtual FIRE path')

    # creating File object
    f = File(name=os.getenv('DATADIR') + "/test.txt",
             type="TEST_F",
             md5sum="f5aa4f4f1380b71acc56750e9f8ff825")

    fobj = api.push_object(fileO=f,
                           dry=False,
                           publish=False,
                           fire_path="test_dir/test.txt")

    del_obj.append(fobj.fireOid)
Exemplo n.º 23
0
def test_push_comp_object_w_fpath(del_obj):
    """
    Test used to check if 'push_object' works
    also with a *.gz compressed file
    """
    log = logging.getLogger('test_push_comp_object_w_fpath')

    log.debug(
        'Pushing (upload) a compressed file.file.File object to FIRE adding a '
        'virtual FIRE path')

    # creating File object
    f = File(name=os.getenv('DATADIR') + "/test.txt.gz",
             type="TEST_F",
             md5sum="a32c5f11391b49b0788def64d28f8807")

    fobj = api.push_object(fileO=f,
                           dry=False,
                           fire_path="test_dir/test.txt.gz")

    del_obj.append(fobj.fireOid)
Exemplo n.º 24
0
def push_file_list(rand_filelst, db_obj, conn_api):
    """
    Fixture to push a list of files to FIRE. It will also load the files
    to the RESEQTRACK DB

    Returns
    -------
    File with the lisf of file paths that have been pushed
    """
    print('Running fixture to push a list of test files to FIRE')
    with open(rand_filelst) as f:
        for p in f:
            p = p.rstrip("\n")
            fObj = File(name=p, type="TYPE_F")
            db_obj.load_file(fObj, dry=False)
            basename = os.path.basename(p)
            conn_api.push_object(fileO=fObj, fire_path=basename, dry=False)

    print('Running fixture to push a list of test files to FIRE. DONE...')

    return rand_filelst
Exemplo n.º 25
0
    abs_path = os.path.abspath(path)
    fire_path = re.sub(settingsO.get('ftp', 'ftp_mount') + "/", '', abs_path)
    dearch_f = db.fetch_file(path=abs_path)
    assert dearch_f is not None, f"File entry with path {abs_path} does not exist in the DB. " \
                                 f"Can't proceed"
    # check if 'path' exists in FIRE
    dearch_fobj = api.fetch_object(firePath=fire_path)
    assert dearch_fobj is not None, f"File entry with firePath {fire_path} is not archived in FIRE. " \
                                    f"Can't proceed"
    # download the file
    # construct path to store the dearchived file
    logger.info(f"Downloading file to be dearchived: {abs_path}")
    basename = os.path.basename(abs_path)
    downloaded_path = os.path.join(args.directory, basename)
    api.retrieve_object(fireOid=dearch_fobj.fireOid,
                        outfile=downloaded_path)
    logger.info(f"Download completed!")

    if str2bool(args.md5check) is True:
        logger.info("Checking if the md5sum of the retrieved and archived"
                    " object matches")
        f = File(name=downloaded_path)
        assert f.md5 == dearch_fobj.objectMd5, "downloaded file and archived object md5sums do" \
                                               " not match. Can't continue"
        logger.info("md5sums match. Will continue dearchiving FIRE object")

    # delete FIRE object
    api.delete_object(fireOid=dearch_fobj.fireOid, dry=str2bool(args.dry))
    # finally, delete de-archived file from RESEQTRACK DB
    db.delete_file(dearch_f, dry=str2bool(args.dry))
Exemplo n.º 26
0
            logger.info(
                f"It seems that file: {f} is already archived and --update_existing is True"
            )
            logger.info(f"Archived file will be updated with new file")

            # First, retrieve the FIRE object
            dearch_fobj = api.fetch_object(firePath=fire_path)

            assert dearch_fobj is not None, f"Object with FIRE path: {fire_path} was not retrieved"

            # delete the FIRE object
            api.delete_object(fireOid=dearch_fobj.fireOid,
                              dry=str2bool(args.dry))

            # Create File object pointing to the file placed in the staging area
            f_in_staging = File(name=f)

            # now update the metadata for f_inftp_o
            status_code = db.update_file(attr_name='md5',
                                         value=f_in_staging.md5,
                                         name=ftp_path,
                                         dry=str2bool(args.dry))
            assert status_code == 0, "Something went wrong when updating the 'md5' field of the entry in the 'File'" \
                                     "table of the the DB"

            status_code = db.update_file(attr_name='size',
                                         value=f_in_staging.size,
                                         name=ftp_path,
                                         dry=str2bool(args.dry))
            assert status_code == 0, "Something went wrong when updating the 'size' field of the entry in the 'File'" \
                                     "table of the the DB"
Exemplo n.º 27
0
    def update_CHANGELOG(self, chlog_p, db, api, dry=True):
        """
        Function to push the updated CHANGELOG file
        to FIRE. This function will do the following:

        1) Update the CHANGELOG file metadata in the DB
        2) Create a backup copy of the CHANGELOG file before being updated
        3) Delete the old CHANGELOG file from FIRE
        4) Push the new (updated) CHANGELOG file to FIRE
        5) Delete the backed-up file if everything went well

        Parameters
        ----------
        chlog_p : str
                  path to updated CHANGELOG file that will be pushed to FIRE.
        db : DB connection object.
        api : API connection object.
        dry: bool, default=True
             Perform a dry run.

        Returns
        -------
        str : Fire path of the updated CHANGELOG files

        Raises
        ------
        Exception
        """
        dtstr = self.dtime.now().strftime('%Y_%m_%dT%H%M%S')
        # update the CHANGELOG metadata in the DB
        chlog_obj = File(name=chlog_p)
        chlog_obj.md5 = chlog_obj.calc_md5()
        chlog_obj.size = os.path.getsize(chlog_obj.name)
        # get the current path to CHANGELOG so it is updated in DB
        chglog_p = f"{CONFIG.get('ftp', 'ftp_mount')}{CONFIG.get('ctree', 'chlog_fpath')}"
        db.update_file('md5', chlog_obj.md5, chglog_p, dry=dry)
        db.update_file('size', chlog_obj.size, chglog_p, dry=dry)

        ce_logger.info("Pushing updated CHANGELOG file to API")
        # to push the updated CHANGELOG you need to delete it from FIRE first
        old_file = api.retrieve_object(
            firePath=CONFIG.get('ctree', 'chlog_fpath'),
            outfile=
            f"{CONFIG.get('ctree','backup')}/{os.path.basename(chlog_p)}."
            f"{dtstr}.backup")
        if old_file is None:
            raise Exception(f"No CHANGELOG file retrieved from the archive")

        fire_obj = api.fetch_object(
            firePath=CONFIG.get('ctree', 'chlog_fpath'))

        if fire_obj is None:
            raise Exception(f"No CHANGELOG file retrieved from the archive")

        ce_logger.info("Delete CHANGELOG to be updated from the archive")
        api.delete_object(fireOid=fire_obj.fireOid, dry=dry)

        ce_logger.info("Push updated CHANGELOG file to the archive")
        api.push_object(chlog_obj,
                        dry=dry,
                        fire_path=CONFIG.get('ctree', 'chlog_fpath'))

        return f"{CONFIG.get('ctree','chlog_fpath')}"
Exemplo n.º 28
0
        "$DBPWD undefined. You need either to pass the password of the MYSQL "
        "server containing the RESEQTRACK database using the --pwd option or set a $DBPWD environment "
        "variable before running this script!")

if not os.path.isfile(args.settings):
    raise Exception(
        f"Config file provided using --settings option({args.settings}) not found!"
    )

# Class to connect with Reseqtrack DB
db = DB(pwd=pwd, dbname=dbname)

if args.file:
    logger.info('File provided using -f, --file option')

    f = File(name=args.file)

    db.delete_file(f, dry=str2bool(args.dry))
elif args.list_file:
    logger.info('File with paths provided using -l, --list_file option')

    for path in args.list_file:
        path = path.rstrip("\n")
        cols = re.split(' +', path)
        if len(cols) > 1:
            raise Exception(
                "Incorrect number of columns in file used for --list_file. "
                "Check format. File should have a different path per line with no whitespaces "
                "in the path.")
        rf = db.fetch_file(path=path)
        if rf is None:
Exemplo n.º 29
0
    raise Exception(
        "$DBPWD undefined. You need either to pass the password of the MYSQL "
        "server containing the RESEQTRACK database using the --pwd option or set a $DBPWD environment "
        "variable before running this script!")

# Class to connect with Reseqtrack DB
db = DB(pwd=pwd, dbname=dbname)

# list with paths to be loaded
files = []
if args.file:
    logger.info('File provided using -f, --file option')

    if args.type is not None:
        logger.debug('Type provided using -t, --type option')
        f = File(name=args.file, type=args.type)
    else:
        logger.debug('No file type provided using -t, --type option')
        logger.debug('File type will be guessed from its file extension')
        f = File(name=args.file, settingsf=args.settings)
        ftype = f.guess_type()
        f.type = ftype
    files.append(f)

elif args.list_file:
    logger.info('File with paths provided using -l, --list_file option')

    for path in args.list_file:
        path = path.rstrip("\n")
        cols = re.split(' +', path)
        if len(cols) > 1: