示例#1
0
文件: tasks.py 项目: vswilliamson/TS
def update_dmfilestat_diskusage(resultpk):
    '''
    Task to update DMFileStat.diskspace for all associated with this resultpk
    This task is launched at the end of pipeline execution.
    NOTE: This can be a long-lived task
    '''
    logid = {'logid':"%s" % ('tasks')}
    try:
        result = Results.objects.get(pk=resultpk)
        search_dirs = [result.get_report_dir(), result.experiment.expDir]
        cached_file_list = dm_utils.get_walk_filelist(search_dirs, list_dir=result.get_report_dir(), save_list=True)
        for dmtype in FILESET_TYPES:
            dmfilestat = result.get_filestat(dmtype)
            dmfilestat_utils.update_diskspace(dmfilestat, cached=cached_file_list)
    except SoftTimeLimitExceeded:
        logger.warn("Time exceeded update_diskusage for (%d) %s" % (resultpk,result.resultsName), extra = logid)
    except:
        raise
    
    try:
        disk_total = 0
        for dmfilestat in [ result.get_filestat(dmtype) for dmtype in FILESET_TYPES]:
            if dmfilestat.dmfileset.type == dmactions_types.SIG:
                dmfilestat.result.experiment.diskusage = dmfilestat.diskspace if dmfilestat.diskspace != None else 0
                dmfilestat.result.experiment.save()
            else:
                partial = dmfilestat.diskspace
                disk_total += int(partial) if partial != None else 0
                result.diskusage = disk_total
                result.save()
        # See dmaction._update_related_objects() which also updates Exp & Results diskusage fields
    except:
        logger.error(traceback.format_exc(), extra = logid)
        raise    
示例#2
0
文件: tasks.py 项目: jcgrenier/TS
def update_dmfilestats_diskspace(dmfilestat):
    ''' Task to update DMFileStat.diskspace '''
    search_dirs = [dmfilestat.result.get_report_dir(), dmfilestat.result.experiment.expDir]
    try:
        cached_file_list = dm_utils.get_walk_filelist(search_dirs, list_dir=dmfilestat.result.get_report_dir())
        dmfilestat_utils.update_diskspace(dmfilestat, cached=cached_file_list)
    except:
        logger.exception(traceback.format_exc(), extra = logid)
示例#3
0
def update_diskspace(dmfilestat, cached=None):
    """Update diskspace field in dmfilestat object"""
    try:
        # search both results directory and raw data directory
        search_dirs = [
            dmfilestat.result.get_report_dir(),
            dmfilestat.result.experiment.expDir,
        ]

        if not cached:
            cached = dm_utils.get_walk_filelist(
                search_dirs, list_dir=dmfilestat.result.get_report_dir())

        total_size = 0

        # Create a list of files eligible to process
        # exclude onboard_results folder if thumbnail or if fullchip was reanalyzed from signal processing
        sigproc_results_dir = os.path.join(dmfilestat.result.get_report_dir(),
                                           "sigproc_results")
        exclude_onboard_results = dmfilestat.result.isThumbnail or (
            "onboard_results" not in os.path.realpath(sigproc_results_dir))

        for start_dir in search_dirs:
            to_process = []
            if os.path.isdir(start_dir):
                to_process, _ = dm_utils._file_selector(
                    start_dir,
                    dmfilestat.dmfileset.include,
                    dmfilestat.dmfileset.exclude,
                    [],
                    exclude_onboard_results,
                    add_linked_sigproc=True,
                    cached=cached,
                )

                # process files in list
                for path in to_process[1:]:
                    try:
                        # logger.debug("%d %s %s" % (j, 'diskspace', path), extra = logid)
                        if not os.path.islink(path):
                            total_size += os.lstat(path)[6]

                    except Exception as inst:
                        if inst.errno == errno.ENOENT:
                            pass
                        else:
                            errmsg = "update_diskspace %s" % (inst)
                            logger.error(errmsg, extra=logid)

        diskspace = float(total_size) / (1024 * 1024)
    except:
        diskspace = None
        raise
    finally:
        dmfilestat.diskspace = diskspace
        dmfilestat.save()
    return diskspace
示例#4
0
def update_dmfilestats_diskspace(dmfilestat):
    ''' Task to update DMFileStat.diskspace '''
    search_dirs = [
        dmfilestat.result.get_report_dir(), dmfilestat.result.experiment.expDir
    ]
    try:
        cached_file_list = dm_utils.get_walk_filelist(
            search_dirs, list_dir=dmfilestat.result.get_report_dir())
        dmfilestat_utils.update_diskspace(dmfilestat, cached=cached_file_list)
    except:
        logger.exception(traceback.format_exc(), extra=logid)
示例#5
0
文件: tasks.py 项目: vswilliamson/TS
def backfill_dmfilestats_diskspace():
    ''' Backfill records with DMFileStat.diskspace = None, one at a time
        These could be older data sets or new ones where update_diskusage task failed
    '''
    dmfilestats = DMFileStat.objects.filter(diskspace=None, action_state='L', files_in_use='').order_by('-created')
    if dmfilestats.count() > 0:
        dmfilestat = dmfilestats[0]
        search_dirs = [dmfilestat.result.get_report_dir(), dmfilestat.result.experiment.expDir]
        try:
            cached_file_list = dm_utils.get_walk_filelist(search_dirs, list_dir=dmfilestat.result.get_report_dir(), save_list=True)
            dmfilestat_utils.update_diskspace(dmfilestat, cached=cached_file_list)
        except:
            logger.error(traceback.format_exc(), extra = logid)
            raise
示例#6
0
def update_diskspace(dmfilestat, cached=None):
    '''Update diskspace field in dmfilestat object'''
    try:
        # search both results directory and raw data directory
        search_dirs = [
            dmfilestat.result.get_report_dir(),
            dmfilestat.result.experiment.expDir
        ]

        if not cached:
            cached = dm_utils.get_walk_filelist(
                search_dirs, list_dir=dmfilestat.result.get_report_dir())

        total_size = 0

        #Create a list of files eligible to process
        is_thumbnail = dmfilestat.result.isThumbnail
        for start_dir in search_dirs:
            to_process = []
            if os.path.isdir(start_dir):
                to_process, _ = dm_utils._file_selector(
                    start_dir,
                    dmfilestat.dmfileset.include,
                    dmfilestat.dmfileset.exclude, [],
                    is_thumbnail,
                    add_linked_sigproc=True,
                    cached=cached)

                #process files in list
                for path in to_process[1:]:
                    try:
                        #logger.debug("%d %s %s" % (j, 'diskspace', path), extra = logid)
                        if not os.path.islink(path):
                            total_size += os.lstat(path)[6]

                    except Exception as inst:
                        if inst.errno == errno.ENOENT:
                            pass
                        else:
                            errmsg = "update_diskspace %s" % (inst)
                            logger.error(errmsg, extra=logid)

        diskspace = float(total_size) / (1024 * 1024)
    except:
        diskspace = None
        raise
    finally:
        dmfilestat.diskspace = diskspace
        dmfilestat.save()
    return diskspace
示例#7
0
def update_diskspace(dmfilestat, cached=None):
    '''Update diskspace field in dmfilestat object'''
    try:
        # search both results directory and raw data directory
        search_dirs = [dmfilestat.result.get_report_dir(), dmfilestat.result.experiment.expDir]

        if not cached:
            cached = dm_utils.get_walk_filelist(search_dirs, list_dir=dmfilestat.result.get_report_dir())

        total_size = 0

        # Create a list of files eligible to process
        # exclude onboard_results folder if thumbnail or if fullchip was reanalyzed from signal processing
        sigproc_results_dir = os.path.join(dmfilestat.result.get_report_dir(), 'sigproc_results')
        exclude_onboard_results = dmfilestat.result.isThumbnail or ('onboard_results' not in os.path.realpath(sigproc_results_dir))

        for start_dir in search_dirs:
            to_process = []
            if os.path.isdir(start_dir):
                to_process, _ = dm_utils._file_selector(start_dir,
                                                        dmfilestat.dmfileset.include,
                                                        dmfilestat.dmfileset.exclude,
                                                        [],
                                                        exclude_onboard_results,
                                                        add_linked_sigproc=True,
                                                        cached=cached)

                # process files in list
                for path in to_process[1:]:
                    try:
                        # logger.debug("%d %s %s" % (j, 'diskspace', path), extra = logid)
                        if not os.path.islink(path):
                            total_size += os.lstat(path)[6]

                    except Exception as inst:
                        if inst.errno == errno.ENOENT:
                            pass
                        else:
                            errmsg = "update_diskspace %s" % (inst)
                            logger.error(errmsg, extra=logid)

        diskspace = float(total_size) / (1024 * 1024)
    except:
        diskspace = None
        raise
    finally:
        dmfilestat.diskspace = diskspace
        dmfilestat.save()
    return diskspace
示例#8
0
def update_dmfilestat_diskusage(resultpk):
    """
    Task to update DMFileStat.diskspace for all associated with this resultpk
    This task is launched at the end of pipeline execution.
    NOTE: This can be a long-lived task
    """
    logid = {"logid": "%s" % ("tasks")}
    try:
        result = Results.objects.get(pk=resultpk)
        search_dirs = [result.get_report_dir(), result.experiment.expDir]
        cached_file_list = dm_utils.get_walk_filelist(
            search_dirs, list_dir=result.get_report_dir(), save_list=True)
        for dmtype in FILESET_TYPES:
            dmfilestat = result.get_filestat(dmtype)
            dmfilestat_utils.update_diskspace(dmfilestat,
                                              cached=cached_file_list)
    except SoftTimeLimitExceeded:
        logger.warn(
            "Time exceeded update_diskusage for (%d) %s" %
            (resultpk, result.resultsName),
            extra=logid,
        )
    except:
        raise

    try:
        disk_total = 0
        for dmfilestat in [
                result.get_filestat(dmtype) for dmtype in FILESET_TYPES
        ]:
            if dmfilestat.dmfileset.type == dmactions_types.SIG:
                dmfilestat.result.experiment.diskusage = (
                    dmfilestat.diskspace
                    if dmfilestat.diskspace != None else 0)
                dmfilestat.result.experiment.save()
            else:
                partial = dmfilestat.diskspace
                disk_total += int(partial) if partial != None else 0
                result.diskusage = disk_total
                result.save()
        # See dmaction._update_diskspace_and_diskusage() which also updates Exp & Results diskusage fields
    except Exception:
        logger.error(traceback.format_exc(), extra=logid)
        raise
示例#9
0
def get_file_list(dmfilestat):
    """Return list of files selected by this DMFileStat record and list of files to not process.
    There are some cases in which the list of selected files contains files which should not be
    processed.  Those are in the to_keep list."""
    logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra=logid)

    to_process = []
    to_keep = []
    try:
        # search both results directory and raw data directory
        search_dirs = [dmfilestat.result.get_report_dir(), dmfilestat.result.experiment.expDir]

        cached_file_list = dm_utils.get_walk_filelist(
            search_dirs, list_dir=dmfilestat.result.get_report_dir())
    except:
        # If this function has an error, this file set should be marked 'E'
        dmfilestat.setactionstate('E')
        logger.error(traceback.format_exc(), extra=logid)
        return (to_process, to_keep)

    try:
        # Determine if this file type is eligible to use a keep list
        kpatterns = _get_keeper_list(dmfilestat, '')

        # Create a list of files eligible to process
        is_thumbnail = dmfilestat.result.isThumbnail
        for start_dir in search_dirs:
            if os.path.isdir(start_dir):
                tmp_process, tmp_keep = dm_utils._file_selector(start_dir,
                                                                dmfilestat.dmfileset.include,
                                                                dmfilestat.dmfileset.exclude,
                                                                kpatterns,
                                                                is_thumbnail,
                                                                cached=cached_file_list)
                to_process += tmp_process
                to_keep += tmp_keep
            else:
                logger.error(traceback.format_exc(), extra=logid)
    except:
        logger.error(traceback.format_exc(), extra=logid)

    return (to_process, to_keep)
示例#10
0
文件: dmactions.py 项目: skner/TS
def get_file_list(dmfilestat):
    """Return list of files selected by this DMFileStat record and list of files to not process.
    There are some cases in which the list of selected files contains files which should not be
    processed.  Those are in the to_keep list."""
    logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra = logid)

    to_process = []
    to_keep = []
    try:
        # search both results directory and raw data directory
        search_dirs = [dmfilestat.result.get_report_dir(), dmfilestat.result.experiment.expDir]

        cached_file_list = dm_utils.get_walk_filelist(search_dirs, list_dir=dmfilestat.result.get_report_dir())
    except:
        # If this function has an error, this file set should be marked 'E'
        dmfilestat.setactionstate('E')
        logger.error(traceback.format_exc(), extra = logid)
        return (to_process, to_keep)

    try:
        #Determine if this file type is eligible to use a keep list
        kpatterns = _get_keeper_list(dmfilestat, '')

        #Create a list of files eligible to process
        is_thumbnail = dmfilestat.result.isThumbnail
        for start_dir in search_dirs:
            if os.path.isdir(start_dir):
                tmp_process, tmp_keep = dm_utils._file_selector(start_dir,
                                                     dmfilestat.dmfileset.include,
                                                     dmfilestat.dmfileset.exclude,
                                                     kpatterns,
                                                     is_thumbnail,
                                                     cached=cached_file_list)
                to_process += tmp_process
                to_keep += tmp_keep
            else:
                logger.error(traceback.format_exc(), extra = logid)
    except:
        logger.error(traceback.format_exc(), extra = logid)
        
    return (to_process, to_keep)
示例#11
0
def backfill_dmfilestats_diskspace():
    ''' Backfill records with DMFileStat.diskspace = None, one at a time
        These could be older data sets or new ones where update_diskusage task failed
    '''
    dmfilestats = DMFileStat.objects.filter(
        diskspace=None, action_state='L', files_in_use='').order_by('-created')
    if dmfilestats.count() > 0:
        dmfilestat = dmfilestats[0]
        search_dirs = [
            dmfilestat.result.get_report_dir(),
            dmfilestat.result.experiment.expDir
        ]
        try:
            cached_file_list = dm_utils.get_walk_filelist(
                search_dirs,
                list_dir=dmfilestat.result.get_report_dir(),
                save_list=True)
            dmfilestat_utils.update_diskspace(dmfilestat,
                                              cached=cached_file_list)
        except:
            logger.error(traceback.format_exc(), extra=logid)
            raise
示例#12
0
def data_import(name, selected, username, copy_data=False, copy_report=True):
    ''' Data import main task.
        Selected dict contains categories to import and path to their serialized json
        Log file can be used to display progress on webpage.
        Copy options:
            if copy_data=True copy Signal Processing or Basecalling Input files to local drive, otherwise mark these categories Archived
            if copy_report=True copy Output files to local drive, otherwise mark it Archived and copy/create report.pdf
    '''

    importing = ImportData(name, selected, username, copy_data, copy_report)
    importing.start()
    importing.log('Selected: %s, copy data: %s, copy report: %s.' % (importing.selected_str, copy_data, copy_report) )
    logger.info('[Data Import] (%s) Started import %s using %s.' % (name, importing.selected_str, importing.json_path) )

    # create DB records
    try:
        objs = load_serialized_json(importing.json_path, importing.createResult, importing.log, importing.add_warning)
        result = objs.get('results', None)
        exp = objs['experiment']
        importing.update_destinations(result, exp)
    except Exception as e:
        msg = traceback.format_exc()
        importing.fail(e, msg)
        logger.error(msg)
        return

    if result:
        result.dmfilestat_set.filter(dmfileset__type__in=selected.keys()).update(action_state='IG')
        EventLog.objects.add_entry(result, "Importing %s %s." % (name, importing.selected_str), username)

    # get list of files
    file_list = []
    if copy_data or copy_report:
        source_paths = set([c['src_path'] for c in importing.categories if c['copy_files']])
        file_list = get_walk_filelist(list(source_paths), list_dir=False, save_list=False)

    # calculate dmfilestat diskspace
    importing.update_diskspace(file_list, result)
    importing.log('Selected categories:' + json.dumps(importing.categories,indent=1))
    
    # destination validation
    try:
        validate_destination(importing.categories)
    except Exception as e:
        msg = traceback.format_exc()
        importing.fail(e, msg, result)
        logger.error(msg)

    # copy files to destination
    for category in importing.categories:
        dmtype = category['dmtype']
        source_dir = category['src_path']
        destination = category['dest_path']

        if result:
            dmfilestat = result.get_filestat(dmtype)
            dmfileset = dmfilestat.dmfileset
        else:
            dmfilestat = None
            dmfileset = DMFileSet.objects.get(version=RELVERSION, type=dmtype)

        # process files
        if category['copy_files']:
            importing.log('Start processing files for %s.' % dmtype)
    
            if not os.path.exists(source_dir):
                error = "Source path %s does not exist, exiting." % source_dir
                importing.fail(error,error,result)
                return
        
            try:
                copy_files_to_destination(source_dir, destination, dmfileset, file_list, importing.log, importing.add_warning)
            except Exception as e:
                msg = traceback.format_exc()
                importing.fail(e, msg, result)
                logger.error(msg)
                return

        elif dmtype == dmactions_types.OUT:
            # special case: importing Report as Archived (copy_report=False)
            try:
                generate_report_pdf(source_dir, result, dmfilestat, importing.log, importing.add_warning)
            except:
                importing.add_warning('Failed to generate report pdf')
                importing.log(traceback.format_exc())


    # update database objects; DM state is Local if files copied, otherwise Archived
    importing.log('Updating location of imported files')
    if result:
        for category in importing.categories:
            dmfilestat = result.get_filestat(category['dmtype'])
            if category['copy_files']:
                dmfilestat.action_state = 'L'
                dmfilestat.created = timezone.now()
            else:
                # data files left on media, need to update dmfilestat to archived location
                dmfilestat.action_state = 'AD'
                dmfilestat.archivepath=category['src_path']
            dmfilestat.diskspace = category['diskspace']
            dmfilestat.save()
    
        result.status = 'Completed'
        result.save()

    elif dmactions_types.SIG in selected.keys() and not copy_data:
        # only Sigproc imported (no dmfilestats) and data files not copied
        exp.expDir = os.path.dirname(importing.json_path)
        exp.save()

    # finish up
    importing.finish(result, exp)
    logger.info('[Data Import] (%s) Done.' % name)
示例#13
0
def search_for_files(dmfilestats, reset, report):
    '''Look for files for the given DM category still in the filesystem.
    This is the long-lived function so we enable ctrl-c interrupt to
    exit the loop and still write the log file.
    '''
    try:
        print("Ctrl-C to exit")
        tracking = []
        num_dmfs = len(dmfilestats)
        for i, dmfs in enumerate(dmfilestats):
            sys.stdout.write("\r%05d/%05d %s" %
                             (i + 1, num_dmfs, progress[i % 7]))
            sys.stdout.flush()
            to_process = []
            to_keep = []
            # For each dmfilestat object, check if files still exist in filesystem
            # 1. Do not rely on cache.filelist
            dirs = [
                dmfs.result.get_report_dir(), dmfs.result.experiment.expDir
            ]
            for start_dir in [dir for dir in dirs if os.path.isdir(dir)]:
                tmp_process, tmp_keep = _file_selector(
                    start_dir,
                    dmfs.dmfileset.include,
                    dmfs.dmfileset.exclude,
                    _get_keeper_list(dmfs, 'delete'),
                    dmfs.result.isThumbnail,
                    False,
                    cached=get_walk_filelist(dirs))
                to_process += tmp_process
                to_keep += tmp_keep

            orphans = list(set(to_process) - set(to_keep))
            logs = models.EventLog.objects.for_model(dmfs.result)
            # We only want to track those datasets with lots of files displaced.
            if len(orphans) > 10:
                #if dmfs.action_state in ['DD', 'AD']:   # Is it marked Deleted?
                if dmfs.action_state in ['DD']:  # Is it marked Deleted?
                    print "\nReport: %s" % (dmfs.result.resultsName)
                    print "Report Directory: %s" % dmfs.result.get_report_dir()
                    print "Status: %s" % 'Deleted' if dmfs.action_state == 'DD' else 'Archived'
                    print "Category: %s" % dmfs.dmfileset.type
                    print "Raw Data Directory: %s" % dmfs.result.experiment.expDir
                    print "No. files: %d" % len(orphans)
                    print "Action Date: %s" % logs[len(logs) - 1].created
                    print "Action Log: %s" % logs[len(logs) - 1].text
                    tracking.append({
                        'report':
                        dmfs.result.resultsName,
                        'report_dir':
                        dmfs.result.get_report_dir(),
                        'state':
                        'Deleted' if dmfs.action_state == 'DD' else 'Archived',
                        'rawdatadir':
                        dmfs.result.experiment.expDir,
                        'num_files':
                        len(orphans),
                        'reset':
                        reset,
                        'action_state':
                        dmfs.action_state,
                        'action_date':
                        '%s' % logs[len(logs) - 1].created,
                        'action_text':
                        logs[len(logs) - 1].text
                    })
                    if reset:
                        try:
                            print "Deleting the cached.filelist file"
                            cachefilename = os.path.join(
                                dmfs.result.get_report_dir(),
                                "cached.filelist")
                            if os.path.exists(cachefilename):
                                #os.unlink(cachefilename)
                                os.rename(cachefilename,
                                          cachefilename + ".hide")
                        except OSError:
                            print traceback.format_exc()
                        dmfs.action_state = "L" if dmfs.action_state == 'DD' else "SA"
                        dmfs.save()
                        print "Reset to %s: %s" % (dmfs.action_state,
                                                   dmfs.result.resultsName)

                    if not report:
                        for entry in orphans:
                            print entry
            elif len(orphans) > 0:
                if not report:
                    print "\rLeft-overs Report: %s" % dmfs.result.resultsName
                    for entry in orphans:
                        print entry

        sys.stdout.write("\n ")
    except (KeyboardInterrupt):
        pass
    except:
        print traceback.format_exc()
    finally:
        return tracking
示例#14
0
def search_for_files(dmfilestats, reset, report):
    '''Look for files for the given DM category still in the filesystem.
    This is the long-lived function so we enable ctrl-c interrupt to
    exit the loop and still write the log file.
    '''
    try:
        print ("Ctrl-C to exit")
        tracking = []
        num_dmfs = len(dmfilestats)
        for i, dmfs in enumerate(dmfilestats):
            sys.stdout.write("\r%05d/%05d %s" % (i + 1, num_dmfs, progress[i % 7]))
            sys.stdout.flush()
            to_process = []
            to_keep = []
            # For each dmfilestat object, check if files still exist in filesystem
            # 1. Do not rely on cache.filelist
            dirs = [dmfs.result.get_report_dir(), dmfs.result.experiment.expDir]
            for start_dir in [dir for dir in dirs if os.path.isdir(dir)]:
                tmp_process, tmp_keep = _file_selector(start_dir,
                                                       dmfs.dmfileset.include,
                                                       dmfs.dmfileset.exclude,
                                                       _get_keeper_list(dmfs, 'delete'),
                                                       dmfs.result.isThumbnail,
                                                       False,
                                                       cached=get_walk_filelist(dirs))
                to_process += tmp_process
                to_keep += tmp_keep

            orphans = list(set(to_process) - set(to_keep))
            logs = models.EventLog.objects.for_model(dmfs.result)
            # We only want to track those datasets with lots of files displaced.
            if len(orphans) > 10:
                # if dmfs.action_state in ['DD', 'AD']:   # Is it marked Deleted?
                if dmfs.action_state in ['DD']:   # Is it marked Deleted?
                    print "\nReport: %s" % (dmfs.result.resultsName)
                    print "Report Directory: %s" % dmfs.result.get_report_dir()
                    print "Status: %s" % 'Deleted' if dmfs.action_state == 'DD' else 'Archived'
                    print "Category: %s" % dmfs.dmfileset.type
                    print "Raw Data Directory: %s" % dmfs.result.experiment.expDir
                    print "No. files: %d" % len(orphans)
                    print "Action Date: %s" % logs[len(logs) - 1].created
                    print "Action Log: %s" % logs[len(logs) - 1].text
                    tracking.append({'report': dmfs.result.resultsName,
                                     'report_dir': dmfs.result.get_report_dir(),
                                     'state': 'Deleted' if dmfs.action_state == 'DD' else 'Archived',
                                     'rawdatadir': dmfs.result.experiment.expDir,
                                     'num_files': len(orphans),
                                     'reset': reset,
                                     'action_state': dmfs.action_state,
                                     'action_date': '%s' % logs[len(logs) - 1].created,
                                     'action_text': logs[len(logs) - 1].text})
                    if reset:
                        try:
                            print "Deleting the cached.filelist file"
                            cachefilename = os.path.join(dmfs.result.get_report_dir(), "cached.filelist")
                            if os.path.exists(cachefilename):
                                # os.unlink(cachefilename)
                                os.rename(cachefilename, cachefilename + ".hide")
                        except OSError:
                            print traceback.format_exc()
                        dmfs.action_state = "L" if dmfs.action_state == 'DD' else "SA"
                        dmfs.save()
                        print "Reset to %s: %s" % (dmfs.action_state, dmfs.result.resultsName)

                    if not report:
                        for entry in orphans:
                            print entry
            elif len(orphans) > 0:
                if not report:
                    print "\rLeft-overs Report: %s" % dmfs.result.resultsName
                    for entry in orphans:
                        print entry

        sys.stdout.write("\n ")
    except (KeyboardInterrupt):
        pass
    except:
        print traceback.format_exc()
    finally:
        return tracking
示例#15
0
def data_import(name, selected, username, copy_data=False, copy_report=True):
    ''' Data import main task.
        Selected dict contains categories to import and path to their serialized json
        Log file can be used to display progress on webpage.
        Copy options:
            if copy_data=True copy Signal Processing or Basecalling Input files to local drive, otherwise mark these categories Archived
            if copy_report=True copy Output files to local drive, otherwise mark it Archived and copy/create report.pdf
    '''

    importing = ImportData(name, selected, username, copy_data, copy_report)
    importing.start()
    importing.log('Selected: %s, copy data: %s, copy report: %s.' %
                  (importing.selected_str, copy_data, copy_report))
    logger.info(
        '[Data Import] (%s) Started import %s using %s, copy data: %s, copy report: %s.'
        % (name, importing.selected_str, importing.json_path, copy_data,
           copy_report))

    # create DB records
    try:
        objs = load_serialized_json(importing.json_path,
                                    importing.createResult, importing.log,
                                    importing.add_warning)
        result = objs.get('results', None)
        exp = objs['experiment']
        importing.update_destinations(result, exp)
    except Exception as e:
        msg = traceback.format_exc()
        importing.fail(e, msg)

    if result:
        dmfilestats_to_import = result.dmfilestat_set.filter(
            dmfileset__type__in=selected.keys())
        # check if importing is allowed
        for dmfilestat in dmfilestats_to_import:
            if dmfilestat.in_process():
                error = "In Process: %s status is %s" % (
                    dmfilestat.dmfileset.type,
                    dmfilestat.get_action_state_display())
                importing.fail(error, error)

        # set status
        dmfilestats_to_import.update(action_state='IG')
        result.status = 'Importing'
        result.save()
        EventLog.objects.add_entry(
            result, "Importing %s %s." % (name, importing.selected_str),
            username)

    # get list of files
    file_list = []
    if copy_data or copy_report:
        source_paths = set(
            [c['src_path'] for c in importing.categories if c['copy_files']])
        file_list = get_walk_filelist(list(source_paths),
                                      list_dir=False,
                                      save_list=False)

    # calculate dmfilestat diskspace
    importing.update_diskspace(file_list, result)
    importing.log('Selected categories:' +
                  json.dumps(importing.categories, indent=1))

    # destination validation
    try:
        validate_destination(importing.categories)
    except Exception as e:
        msg = traceback.format_exc()
        importing.fail(e, msg, result)

    # copy files to destination
    for category in importing.categories:
        dmtype = category['dmtype']
        source_dir = category['src_path']
        destination = category['dest_path']

        if result:
            dmfilestat = result.get_filestat(dmtype)
            dmfileset = dmfilestat.dmfileset
        else:
            dmfilestat = None
            dmfileset = DMFileSet.objects.get(version=RELVERSION, type=dmtype)

        # process files
        if category['copy_files']:
            importing.log('Start processing files for %s.' % dmtype)

            if not os.path.exists(source_dir):
                error = "Source path %s does not exist, exiting." % source_dir
                importing.fail(error, error, result)

            try:
                copy_files_to_destination(source_dir, destination, dmfileset,
                                          file_list, importing.log,
                                          importing.add_warning)
            except Exception as e:
                msg = traceback.format_exc()
                importing.fail(e, msg, result)

        elif dmtype == dmactions_types.OUT:
            # special case: importing Report as Archived (copy_report=False)
            try:
                generate_report_pdf(source_dir, result, dmfilestat,
                                    importing.log, importing.add_warning)
            except:
                importing.add_warning('Failed to generate report pdf')
                importing.log(traceback.format_exc())

    # update database objects; DM state is Local if files copied, otherwise Archived
    importing.log('Updating location of imported files')
    if result:
        for category in importing.categories:
            dmfilestat = result.get_filestat(category['dmtype'])
            if category['copy_files']:
                dmfilestat.action_state = 'L'
                dmfilestat.created = timezone.now()
            else:
                # data files left on media, need to update dmfilestat to archived location
                dmfilestat.action_state = 'AD'
                dmfilestat.archivepath = category['src_path']
            dmfilestat.diskspace = category['diskspace']
            dmfilestat.save()

        result.status = 'Completed'
        result.save()

    elif dmactions_types.SIG in selected.keys() and not copy_data:
        # only Sigproc imported (no dmfilestats) and data files not copied
        exp.expDir = os.path.dirname(importing.json_path)
        exp.save()

    # finish up
    importing.finish(result, exp)
    logger.info('[Data Import] (%s) Done.' % name)
示例#16
0
def process_import(importing, copy_data, copy_report):
    # create DB records
    try:
        objs = load_serialized_json(
            importing.json_path, importing.createResult, importing.log, importing.add_warning)
        result = objs.get('results', None)
        exp = objs['experiment']
        importing.update_destinations(result, exp)
    except Exception as e:
        raise

    if result:
        dmfilestats_to_import = result.dmfilestat_set.filter(dmfileset__type__in=importing.dmtypes)
        # check if importing is allowed
        for dmfilestat in dmfilestats_to_import:
            if dmfilestat.action_state in ['AG', 'DG', 'EG', 'SA', 'SE', 'SD']:
                raise Exception("Cannot import %s when data is in process: %s" %
                                (dmfilestat.dmfileset.type, dmfilestat.get_action_state_display()))

        # set status
        dmfilestats_to_import.update(action_state='IG')
        result.status = 'Importing'
        result.save()
        EventLog.objects.add_entry(result, "Importing %s %s." %
                                   (importing.name, importing.selected_str), importing.user)

    # get list of files
    file_list = []
    if copy_data or copy_report:
        source_paths = set([c['src_path'] for c in importing.categories if c['copy_files']])
        file_list = get_walk_filelist(list(source_paths), list_dir=False, save_list=False)

    # calculate dmfilestat diskspace
    importing.update_diskspace(file_list)
    importing.log('Selected categories:' + json.dumps(importing.categories, indent=1))

    # destination validation
    try:
        validate_destination(importing.categories)
    except:
        raise

    # copy files to destination
    for category in importing.categories:
        dmtype = category['dmtype']
        source_dir = category['src_path']
        destination = category['dest_path']

        if result:
            dmfilestat = result.get_filestat(dmtype)
            dmfileset = dmfilestat.dmfileset
        else:
            dmfilestat = None
            dmfileset = DMFileSet.objects.get(version=RELVERSION, type=dmtype)

        # process files
        if category['copy_files']:
            importing.log('Start processing files for %s.' % dmtype)

            if not os.path.exists(source_dir):
                raise Exception("Source path %s does not exist, exiting." % source_dir)

            try:
                copy_files_to_destination(
                    source_dir, destination, dmfileset, file_list, importing.log, importing.add_warning)
            except:
                raise

        elif dmtype == dmactions_types.OUT:
            # special case: importing Report as Archived (copy_report=False)
            try:
                generate_report_pdf(source_dir, result, dmfilestat, importing.log, importing.add_warning)
            except:
                importing.add_warning('Failed to generate report pdf')
                importing.log(traceback.format_exc())

    # update database objects; DM state is Local if files copied, otherwise Archived
    importing.log('Updating location of imported files')
    if result:
        for category in importing.categories:
            dmfilestat = result.get_filestat(category['dmtype'])
            if category['copy_files']:
                dmfilestat.action_state = 'L'
                dmfilestat.created = timezone.now()
            else:
                # data files left on media, need to update dmfilestat to archived location
                dmfilestat.action_state = 'AD'
                dmfilestat.archivepath = category['src_path']
            dmfilestat.diskspace = category['diskspace']
            dmfilestat.save()

        result.status = 'Completed'
        result.save()

    elif dmactions_types.SIG in importing.dmtypes:
        if copy_data:
            # if any results exist for this data set, need to update their dmfilestats
            DMFileStat.objects.filter(
                dmfileset__type=dmactions_types.SIG, result__experiment=exp).update(action_state='L')
        else:
            # only Sigproc imported (no dmfilestats) and data files not copied
            exp.expDir = os.path.dirname(importing.json_path)
            exp.save()
示例#17
0
def _get_file_list_dict(dmfilestat, action, user, user_comment, msg_banner):
    '''
    This function generates a list of files to process.
    '''
    logid = {'logid': "%s" % ('dmactions')}
    logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra=logid)

    if dmfilestat.isdeleted():
        errmsg = "The %s for %s are deleted" % (dmfilestat.dmfileset.type, dmfilestat.result.resultsName)
        logger.warn(errmsg, extra=logid)
        raise Exception(errmsg)
    elif dmfilestat.isarchived():
        if not os.path.exists(dmfilestat.archivepath):
            errmsg = "Cannot access backup location %s" % dmfilestat.archivepath
            logger.warn(errmsg, extra=logid)
            raise Exception(errmsg)
        else:
            # search archived directory
            search_dirs = [dmfilestat.archivepath]
    else:
        # search both results directory and raw data directory
        search_dirs = [dmfilestat.result.get_report_dir(), dmfilestat.result.experiment.expDir]

    # List of all files associated with the report
    cached_file_list = dm_utils.get_walk_filelist(search_dirs, list_dir=dmfilestat.result.get_report_dir())

    # Determine if this file type is eligible to use a keep list
    kpatterns = _get_keeper_list(dmfilestat, action)

    # Create a list of files eligible to process
    list_of_file_dict = []
    is_thumbnail = dmfilestat.result.isThumbnail
    add_linked_sigproc = False if (
        action == DELETE or dmfilestat.dmfileset.type == dmactions_types.INTR) else True
    for start_dir in search_dirs:
        logger.debug("Searching: %s" % start_dir, extra=logid)
        to_process = []
        to_keep = []
        if os.path.isdir(start_dir):
            to_process, to_keep = dm_utils._file_selector(start_dir,
                                                          dmfilestat.dmfileset.include,
                                                          dmfilestat.dmfileset.exclude,
                                                          kpatterns,
                                                          is_thumbnail,
                                                          add_linked_sigproc,
                                                          cached=cached_file_list)
        logger.info("%d files to process at %s" %
                    (len(list(set(to_process) - set(to_keep))), start_dir), extra=logid)
        list_of_file_dict.append(
            {
                'pk': dmfilestat.id,
                'action': action,
                'archivepath': dmfilestat.archivepath,
                'start_dir': start_dir,
                'to_process': to_process,
                'to_keep': to_keep,
                'total_cnt': len(list(set(to_process) - set(to_keep))),
                'processed_cnt': 0,
                'total_size': 0,
                'user': user,
                'user_comment': user_comment,
                'lockfile': '',
                'msg_banner': msg_banner,
            }
        )
    return list_of_file_dict
示例#18
0
def _process_fileset_task(dmfilestat, action, user, user_comment, lockfile, msg_banner):
    '''
    This function generates a list of files to process, then hands the list to a recursive
    celery task function.  The recursion continues until the list is empty.  The calling
    function exits immediately.
    '''
    logid = {'logid':"%s" % (lockfile)}
    logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra = logid)

    if dmfilestat.isdeleted():
        errmsg = "The %s for %s are deleted" % (dmfilestat.dmfileset.type, dmfilestat.result.resultsName)
        logger.warn(errmsg, extra = logid)
        raise Exception(errmsg)
    elif dmfilestat.isarchived():
        if not os.path.exists(dmfilestat.archivepath):
            errmsg = "Cannot access backup location %s" % dmfilestat.archivepath
            logger.warn(errmsg, extra = logid)
            raise Exception(errmsg)
        else:
            # search archived directory
            search_dirs = [dmfilestat.archivepath]
    else:
        # search both results directory and raw data directory
        search_dirs = [dmfilestat.result.get_report_dir(), dmfilestat.result.experiment.expDir]

    # Create a lock file here to block any other actions on this report (see TS-8411)
    lock_id = "%s_%s" % (dmfilestat.result.resultsName, dm_utils.slugify(dmfilestat.dmfileset.type))
    locallock = TaskLock(lock_id, timeout=60) # short timeout in case lock release code doesn't get called

    if not(locallock.lock()):
        logger.warn("lock file exists: %s(%s)" % (lock_id, locallock.get()), extra = logid)
        # Release the task lock
        try:
            applock = TaskLock(lockfile)
            applock.unlock()
        except:
            logger.error(traceback.format_exc(), extra = logid)
        return

    logger.info("lock file created: %s(%s)" % (lock_id, locallock.get()), extra = logid)

    if action == ARCHIVE:
        dmfilestat.setactionstate('AG')
    elif action == DELETE:
        dmfilestat.setactionstate('DG')
    elif action == EXPORT:
        dmfilestat.setactionstate('EG')

    # List of all files associated with the report
    cached_file_list = dm_utils.get_walk_filelist(search_dirs, list_dir=dmfilestat.result.get_report_dir())

    #Determine if this file type is eligible to use a keep list
    kpatterns = _get_keeper_list(dmfilestat, action)

    #Create a list of files eligible to process
    list_of_file_dict = []
    is_thumbnail = dmfilestat.result.isThumbnail
    add_linked_sigproc = False if (action == DELETE or dmfilestat.dmfileset.type == dmactions_types.INTR) else True
    for start_dir in search_dirs:
        logger.debug("Searching: %s" % start_dir, extra = logid)
        to_process = []
        to_keep = []
        if os.path.isdir(start_dir):
            to_process, to_keep = dm_utils._file_selector(start_dir,
                                                 dmfilestat.dmfileset.include,
                                                 dmfilestat.dmfileset.exclude,
                                                 kpatterns,
                                                 is_thumbnail,
                                                 add_linked_sigproc,
                                                 cached=cached_file_list)
        logger.info("%d files to process at %s" % (len(list(set(to_process) - set(to_keep))), start_dir), extra = logid)
        list_of_file_dict.append(
            {
                'pk':dmfilestat.id,
                'action':action,
                'archivepath':dmfilestat.archivepath,
                'start_dir':start_dir,
                'to_process':to_process,
                'to_keep':to_keep,
                'total_cnt':len(list(set(to_process) - set(to_keep))),
                'processed_cnt':0,
                'total_size':0,
                'user':user,
                'user_comment':user_comment,
                'lockfile':lockfile,
                'msg_banner':msg_banner,
            }
        )

    try:
        pfilename = set_action_param_var(list_of_file_dict)

        # Call the recursive celery task function to process the list
        _process_task.delay(pfilename)
            
    except:
        logger.error("We got an error here, _process_fileset_task", extra = logid)
        raise
    finally:
        if locallock:
            locallock.unlock()

    return
示例#19
0
def process_import(importing, copy_data, copy_report):
    # create DB records
    try:
        objs = load_serialized_json(
            importing.json_path, importing.createResult, importing.log, importing.add_warning)
        result = objs.get('results', None)
        exp = objs['experiment']
        importing.update_destinations(result, exp)
    except Exception as e:
        raise

    if result:
        dmfilestats_to_import = result.dmfilestat_set.filter(dmfileset__type__in=importing.dmtypes)
        # check if importing is allowed
        for dmfilestat in dmfilestats_to_import:
            if dmfilestat.action_state in ['AG', 'DG', 'EG', 'SA', 'SE', 'SD']:
                raise Exception("Cannot import %s when data is in process: %s" %
                                (dmfilestat.dmfileset.type, dmfilestat.get_action_state_display()))

        # set status
        dmfilestats_to_import.update(action_state='IG')
        result.status = 'Importing'
        result.save()
        EventLog.objects.add_entry(result, "Importing %s %s." %
                                   (importing.name, importing.selected_str), importing.user)

    # get list of files
    file_list = []
    if copy_data or copy_report:
        source_paths = set([c['src_path'] for c in importing.categories if c['copy_files']])
        file_list = get_walk_filelist(list(source_paths), list_dir=False, save_list=False)

    # calculate dmfilestat diskspace
    importing.update_diskspace(file_list)
    importing.log('Selected categories:' + json.dumps(importing.categories, indent=1))

    # destination validation
    try:
        validate_destination(importing.categories)
    except:
        raise

    # copy files to destination
    for category in importing.categories:
        dmtype = category['dmtype']
        source_dir = category['src_path']
        destination = category['dest_path']

        if result:
            dmfilestat = result.get_filestat(dmtype)
            dmfileset = dmfilestat.dmfileset
        else:
            dmfilestat = None
            dmfileset = DMFileSet.objects.get(version=RELVERSION, type=dmtype)

        # process files
        if category['copy_files']:
            importing.log('Start processing files for %s.' % dmtype)

            if not os.path.exists(source_dir):
                raise Exception("Source path %s does not exist, exiting." % source_dir)

            try:
                copy_files_to_destination(
                    source_dir, destination, dmfileset, file_list, importing.log, importing.add_warning)
            except:
                raise

        elif dmtype == dmactions_types.OUT:
            # special case: importing Report as Archived (copy_report=False)
            try:
                generate_report_pdf(source_dir, result, dmfilestat, importing.log, importing.add_warning)
            except:
                importing.add_warning('Failed to generate report pdf')
                importing.log(traceback.format_exc())

    # update database objects; DM state is Local if files copied, otherwise Archived
    importing.log('Updating location of imported files')
    if result:
        for category in importing.categories:
            dmfilestat = result.get_filestat(category['dmtype'])
            if category['copy_files']:
                dmfilestat.action_state = 'L'
                dmfilestat.created = timezone.now()
            else:
                # data files left on media, need to update dmfilestat to archived location
                dmfilestat.action_state = 'AD'
                dmfilestat.archivepath = category['src_path']
            dmfilestat.diskspace = category['diskspace']
            dmfilestat.save()

        result.status = 'Completed'
        result.save()

    elif dmactions_types.SIG in importing.dmtypes:
        if copy_data:
            # if any results exist for this data set, need to update their dmfilestats
            DMFileStat.objects.filter(
                dmfileset__type=dmactions_types.SIG, result__experiment=exp).update(action_state='L')
        else:
            # only Sigproc imported (no dmfilestats) and data files not copied
            exp.expDir = os.path.dirname(importing.json_path)
            exp.save()
示例#20
0
def process_import(importing, copy_data, copy_report):
    # create DB records
    try:
        objs = load_serialized_json(
            importing.json_path,
            importing.createResult,
            importing.log,
            importing.add_warning,
        )
        result = objs.get("results", None)
        exp = objs["experiment"]
        importing.update_destinations(result, exp)
    except Exception as e:
        raise

    if result:
        dmfilestats_to_import = result.dmfilestat_set.filter(
            dmfileset__type__in=importing.dmtypes)
        # check if importing is allowed
        for dmfilestat in dmfilestats_to_import:
            if dmfilestat.action_state in ["AG", "DG", "EG", "SA", "SE", "SD"]:
                raise Exception(
                    "Cannot import %s when data is in process: %s" %
                    (dmfilestat.dmfileset.type,
                     dmfilestat.get_action_state_display()))

        # set status
        dmfilestats_to_import.update(action_state="IG")
        result.status = "Importing"
        result.save()
        EventLog.objects.add_entry(
            result,
            "Importing %s %s." % (importing.name, importing.selected_str),
            importing.user,
        )

    # get list of files
    file_list = []
    if copy_data or copy_report:
        source_paths = set(
            [c["src_path"] for c in importing.categories if c["copy_files"]])
        file_list = get_walk_filelist(list(source_paths),
                                      list_dir=False,
                                      save_list=False)

    # calculate dmfilestat diskspace
    importing.update_diskspace(file_list)
    importing.log("Selected categories:" +
                  json.dumps(importing.categories, indent=1))

    # destination validation
    try:
        validate_destination(importing.categories)
    except Exception:
        raise

    # copy files to destination
    for category in importing.categories:
        dmtype = category["dmtype"]
        source_dir = category["src_path"]
        destination = category["dest_path"]

        if result:
            dmfilestat = result.get_filestat(dmtype)
            dmfileset = dmfilestat.dmfileset
        else:
            dmfilestat = None
            dmfileset = DMFileSet.objects.get(version=RELVERSION, type=dmtype)

        # process files
        if category["copy_files"]:
            importing.log("Start processing files for %s." % dmtype)

            if not os.path.exists(source_dir):
                raise Exception("Source path %s does not exist, exiting." %
                                source_dir)

            try:
                copy_files_to_destination(
                    source_dir,
                    destination,
                    dmfileset,
                    file_list,
                    importing.log,
                    importing.add_warning,
                )
            except Exception:
                raise

            # for OIA results need sigproc_results link
            if dmtype == dmactions_types.BASE and os.path.exists(
                    os.path.join(destination, "onboard_results")):
                if result:
                    sigproc_results_link = os.path.join(
                        result.get_report_dir(), "sigproc_results")
                else:
                    sigproc_results_link = os.path.join(
                        destination, "sigproc_results")

                if not os.path.exists(sigproc_results_link):
                    os.symlink(
                        os.path.join(destination, "onboard_results",
                                     "sigproc_results"),
                        sigproc_results_link,
                    )

        elif dmtype == dmactions_types.OUT:
            # special case: importing Report as Archived (copy_report=False)
            try:
                generate_report_pdf(source_dir, result, dmfilestat,
                                    importing.log, importing.add_warning)
            except Exception:
                importing.add_warning("Failed to generate report pdf")
                importing.log(traceback.format_exc())

    # update database objects; DM state is Local if files copied, otherwise Archived
    importing.log("Updating location of imported files")
    if result:
        for category in importing.categories:
            dmfilestat = result.get_filestat(category["dmtype"])
            if category["copy_files"]:
                dmfilestat.action_state = "L"
                dmfilestat.created = timezone.now()
            else:
                # data files left on media, need to update dmfilestat to archived location
                dmfilestat.action_state = "AD"
                dmfilestat.archivepath = category["src_path"]
            dmfilestat.diskspace = category["diskspace"]
            dmfilestat.save()

        result.status = "Completed"
        result.save()

        # update all diskusage related database entries
        try:
            update_dmfilestat_diskusage(result.pk)
        except Exception:
            importing.log("Error updating diskusage")
            logger.error(traceback.format_exc())

    elif dmactions_types.SIG in importing.dmtypes:
        if copy_data:
            # if any results exist for this data set, need to update their dmfilestats
            DMFileStat.objects.filter(
                dmfileset__type=dmactions_types.SIG,
                result__experiment=exp).update(action_state="L")
        else:
            # only Sigproc imported (no dmfilestats) and data files not copied
            exp.expDir = os.path.dirname(importing.json_path)
            exp.save()
示例#21
0
文件: dmactions.py 项目: skner/TS
def _process_fileset_task(dmfilestat, action, user, user_comment, lockfile, msg_banner):
    '''
    This function generates a list of files to process, then hands the list to a recursive
    celery task function.  The recursion continues until the list is empty.  The calling
    function exits immediately.
    '''
    logid = {'logid':"%s" % (lockfile)}
    logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra = logid)

    if dmfilestat.isdeleted():
        errmsg = "The %s for %s are deleted" % (dmfilestat.dmfileset.type, dmfilestat.result.resultsName)
        logger.warn(errmsg, extra = logid)
        raise Exception(errmsg)
    elif dmfilestat.isarchived():
        if not os.path.exists(dmfilestat.archivepath):
            errmsg = "Cannot access backup location %s" % dmfilestat.archivepath
            logger.warn(errmsg, extra = logid)
            raise Exception(errmsg)
        else:
            # search archived directory
            search_dirs = [dmfilestat.archivepath]
    else:
        # search both results directory and raw data directory
        search_dirs = [dmfilestat.result.get_report_dir(), dmfilestat.result.experiment.expDir]

    # Create a lock file here to block any other actions on this report (see TS-8411)
    lock_id = "%s_%s" % (dmfilestat.result.resultsName, dm_utils.slugify(dmfilestat.dmfileset.type))
    locallock = TaskLock(lock_id, timeout=60) # short timeout in case lock release code doesn't get called

    if not(locallock.lock()):
        logger.warn("lock file exists: %s(%s)" % (lock_id, locallock.get()), extra = logid)
        # Release the task lock
        try:
            applock = TaskLock(lockfile)
            applock.unlock()
        except:
            logger.error(traceback.format_exc(), extra = logid)
        return

    logger.info("lock file created: %s(%s)" % (lock_id, locallock.get()), extra = logid)

    if action == ARCHIVE:
        dmfilestat.setactionstate('AG')
    elif action == DELETE:
        dmfilestat.setactionstate('DG')
    elif action == EXPORT:
        dmfilestat.setactionstate('EG')

    # List of all files associated with the report
    cached_file_list = dm_utils.get_walk_filelist(search_dirs, list_dir=dmfilestat.result.get_report_dir())

    #Determine if this file type is eligible to use a keep list
    kpatterns = _get_keeper_list(dmfilestat, action)

    #Create a list of files eligible to process
    list_of_file_dict = []
    is_thumbnail = dmfilestat.result.isThumbnail
    add_linked_sigproc = False if (action == DELETE or dmfilestat.dmfileset.type == dmactions_types.INTR) else True
    for start_dir in search_dirs:
        logger.debug("Searching: %s" % start_dir, extra = logid)
        to_process = []
        to_keep = []
        if os.path.isdir(start_dir):
            to_process, to_keep = dm_utils._file_selector(start_dir,
                                                 dmfilestat.dmfileset.include,
                                                 dmfilestat.dmfileset.exclude,
                                                 kpatterns,
                                                 is_thumbnail,
                                                 add_linked_sigproc,
                                                 cached=cached_file_list)
        logger.info("%d files to process at %s" % (len(list(set(to_process) - set(to_keep))), start_dir), extra = logid)
        list_of_file_dict.append(
            {
                'pk':dmfilestat.id,
                'action':action,
                'archivepath':dmfilestat.archivepath,
                'start_dir':start_dir,
                'to_process':to_process,
                'to_keep':to_keep,
                'total_cnt':len(list(set(to_process) - set(to_keep))),
                'processed_cnt':0,
                'total_size':0,
                'user':user,
                'user_comment':user_comment,
                'lockfile':lockfile,
                'msg_banner':msg_banner,
            }
        )

    try:
        pfilename = set_action_param_var(list_of_file_dict)

        # Call the recursive celery task function to process the list
        _process_task.delay(pfilename)
            
    except:
        logger.error("We got an error here, _process_fileset_task", extra = logid)
        raise
    finally:
        if locallock:
            locallock.unlock()

    return