def searchFiles(searchDir,opts,fileValidationSpecs,fileValidationTools,pipelineTools):
    # compile all regexes
    regexes = {}
    for i,spec in enumerate(fileValidationSpecs):
        try:
            regexes[i] = re.compile(spec["regex"])
        except:
            raise ValueError("Could not compile regex: %s" % spec["regex"])
        
    allFiles = {}
    filesPerProc = {}
    # walk directory and 
    for dirpath, dirs, files in os.walk(searchDir,followlinks=True):

        for file in files:
            
            filePath = os.path.realpath(os.path.join(dirpath, file))
            #print(os.path.join(dirpath, file))
            # try to match path with all regexes till one matches:
            for specIdx, spec in enumerate(fileValidationSpecs):
                
                m=regexes[specIdx].search(filePath)
                
                # we have a file match
                if m:
                    
                    try:
                        processId = int(m.group("processId"))                
                    except:
                        raise ValueError("Non convertable processId found in filePath %s" % filePath)  
                    
                    if processId not in filesPerProc:
                        filesPerProc[processId] = {"allFiles" : [] , "tools" : { tool:[] for tool in pipelineTools.keys() } };
                    
                    #make dict for this file
                    f = {}
                    # add regex groups
                    f.update(m.groupdict())

                    # add all values from the validation spec (deep copy since we want for each one a different)
                    f.update(copy.deepcopy(spec))
                    
                    # set file status on finished, (as initial guess, validation fully determines this value)
                    f.update({"status":"finished"})                            
                    
                    # format all values again with the regex results
                    f = cF.formatAll(f,m.groupdict(),exceptKeys={"regex":None})

                    # get tool of this file
                    if "tool" in f:
                        tool = f["tool"]    
                        if tool not in pipelineTools.keys():
                            raise ValueError("The tool %s is not in %s!" % (tool,str(pipelineTools.keys())) )
                    else:
                        raise ValueError("You need to define a 'tool' key for %s " % str(spec))
                        
                    # make hashes
                    if "hashString" in spec:
                        h = cF.makeUUID( spec["hashString"].format(**m.groupdict()) )
                        f["hash"] = h
                    else:
                        raise ValueError("You need to define a 'hash' key for file %s " % str(spec))
                        
                        
                    # convert frameIdx
                    if "frameIdx" in f:
                         f["frameIdx"] = int(f["frameIdx"])
                    else:
                         raise ValueError("You need to define a 'frameIdx' key for %s (or in regex!) " % str(spec))                            
                    
                    # add file to the lists
                    filesPerProc[processId]["allFiles"].append( f )
                    filesPerProc[processId]["tools"][ tool ].append(f)
                    
                    
                    if f["hash"] not in allFiles:
                            allFiles[f["hash"]] = f
                    else:
                        raise ValueError("Found files with the same hash %s, %s, this should not happen!" % (f["absPath"], allFiles[f["hash"]]["absPath"] ) )
                    
                    
                    break
                        
    if not allFiles:
        print("We found no files in folder: %s to validate!" % searchDir) 
        return allFiles          
     
    # sort files according to maximal modified time of the output files for each tool and each process
    for procId, procFiles in filesPerProc.items():
        for tool,files in procFiles["tools"].items():
            filesPerProc[procId]["tools"][tool] =  sorted( files , key= lambda file : os.path.getmtime(file["absPath"]) );

    #determine files to validate
    filesToValidate = []
    for procid, procFiles in filesPerProc.items():
            if opts.validateOnlyLastModified:
               # validate last file of all tools for each processor, to see if its ok or not, all others are valid
               for tool, toolFiles in procFiles["tools"].items():
                   if toolFiles:
                       filesToValidate.append(toolFiles[-1])
            else:
               filesToValidate += procFiles["allFiles"]

      
    # Validate all files with the appropriate command

    for fIdx, file in enumerate(filesToValidate):
        try:
            ext = os.path.splitext(file["absPath"])[1];
            try:
                validateCmd = fileValidationTools[ext]
            except:
                print("No validation command found for extentsion of file: %s" % file["absPath"])
                raise
            

            validateCmd = validateCmd.format(**{"file":file["absPath"]})

            try:
                out = subprocess.check_output(validateCmd.split(" ")).decode('utf-8')
            except:
                print("Validation command %s failed!" % validateCmd)
                raise
                
            if out not in ["finished","recover"]:
                print("Validation output %s not in list ['finished','recover']" % out)
                raise
            else:
                validationAttributes = {"status":out}
                
            filesToValidate[fIdx].update(validationAttributes);
        except:

            # file is invalid, clear this file from the list 
            filesToValidate[fIdx]["status"] = "invalid";

    print("Validated last files of each tool in the pipeline: ", "\n".join([ f["absPath"] + " --> " + f["status"] for f in filesToValidate ]) ) 

    # filter all empty stuff from lists:
    allFiles = dict(filter(lambda x : x[1]["status"] != "invalid" ,allFiles.items()))
    del filesPerProc
    return allFiles
def recoverFrames(opts,allFrames,framesPerIdx, pipelineTools):
    
    
    def addFile(frame,file,parent=None):
        
        if "usedFile" in file and file["usedFile"] :
            #this file has already been used
            return
        
        if file["status"]==STATUS_RECOVER:
            
            print("added File: %s (recover)" % file["relPath"])
            # add a file move to recover this file
            frame["fileMover"].append(file["fileMoveRecover"])
            # mark file as used
            file["usedFile"] = True
            
        elif file["status"]==STATUS_FINISHED:
            if parent:
                print("added File: %s (finished, dependent)" % file["relPath"])
                # add a file move to recover this file
                frame["fileMover"].append(file["fileMoveDependent"])
                #print("id", id(frame), frame["fileMover"] )
                # mark file as used
                file["usedFile"] = True

    def addTool(frame,toolName, visitedTools,  parentToolName=None):        
        
       if toolName in visitedTools:
           return
           
       visitedTools.add(toolName);
       frameTool = frame["tools"][toolName]
       # if tool is not finished
       if frameTool["status"] != STATUS_FINISHED:    
           
            # add all this tools checkpoint files
            for outFileProp in frameTool["outputFiles"]:
                   
                if not outFileProp["cpFile"] == None:
                    addFile(frame,outFileProp["cpFile"],parentToolName)
          
            # add all dependent tools
            depTools = pipelineTools[toolName]["dependencies"]
            if depTools:
                for depTool in depTools:
                   addTool(frame,depTool,visitedTools,toolName)
                
       else:
            # we are finished, but
            # if we have a parent tool (always not finished), 
            # we add our finished checkpoint files
            if parentToolName:
               # add all its checkpoint files of output files
                for outFileProp in frameTool["outputFiles"]:
                   
                    if outFileProp["cpFile"] == None:
                        raise ValueError("""Trying to add non existant checkpoint file of output file %s in tool 
                        %s!""" % (str(outFileProp),toolName) )
                   
                    addFile(frame,outFileProp["cpFile"],parentToolName)
                   
            #else:
            # if no parent given, dont do anything

    # get all file info
    if opts.validationFileInfo:
        print("Setup recovery from file info===============================")
        print("Using validation file: %s", opts.validationFileInfo)
        checkpointFiles = cF.jsonLoad(opts.validationFileInfo);
        
        cpFiles = { "hash": {}, "all" : []}
        
        for f in checkpointFiles:
            tool = f["tool"]
            fileId = f["hash"]
            frameIdx = int(f["frameIdx"])
            ha = f["hash"]
            
            
            cpFiles["all"].append(f)
            
            if ha in cpFiles["hash"]:
                raise ValueError("File %s and %s have the same hash!" % (f["absPath"], cpFiles["hash"][ha]["absPath"] ) ) 
            else:
                cpFiles["hash"][ha] = f

        print("===========================================================")
        
        print("Determine status of all tools =============================")
        # move over all frames, for each tool and match cpFiles 
        for frameIdx,frame in framesPerIdx.items():
            finished = False;
            
            for toolName,tool in frame["tools"].items():
                
                # if there are checkpoint files corresponding to outputfiles of this tool
                finishedOutFiles = 0
                for outFileProp in tool["outputFiles"]:

                    ha =  cF.makeUUID(outFileProp["hashString"])
                    
                    
                    outFileProp["hash"] = ha
                    if ha in cpFiles["hash"]: # we found checkpoint file
                        cpFile = cpFiles["hash"][ha]
                        absP = cpFile["absPath"]
                        
                        print("Frame: %i " % frameIdx + 
                        " checkpoint file matched:\n\t%s\n\thash: %s\n\tstatus: %s " % ( absP[:10]+'...'+absP[-20:] if len(absP) > 70 else absP ,ha, cpFile["status"] ))
                        
                        outFileProp["cpFile"] = cpFile
                        
                        if outFileProp["cpFile"]["status"] == STATUS_FINISHED:
                            finishedOutFiles += 1
                            
                    else:
                        outFileProp["cpFile"] = None
                    

                # if all output files are finished -> tool is finished
                if finishedOutFiles == len(tool["outputFiles"]):
                    
                    tool["status"] =  STATUS_FINISHED
                    print("Tool: %s -> finished" % toolName)
                
        
        #print("Dependency check===========================================")
        
        
        ## for each frameIdx file list, 
        ## travel dependency of each file and if some files are missing       
        ## silently remove this file from the cpFilesPerFrame because this frameIdx can not be recovered!
        
        #invalidFrameIdx = set() # list for not recoverable frames! (should not happen)
        #for frameIdx,frame in framesPerIdx.items():
            
            #if frameIdx not in cpFilesPerFrame:
                #continue
            #stack = cpFilesPerFrame[frameIdx]["all"][:] # shallow copy (remove files from stack)
            #invalidFrame = walkDependencies(stack,cpFilesPerFrame[frameIdx])
            
            #if invalidFrame:
                #print("Invalid frameIdx: %i for recovery!" % frameIdx)
                #invalidFrameIdx.add(frameIdx)
                ## continue to next frame
                #continue
        ##endfor
                
        ## remove all files from all tools for invalid frames
        #for k in invalidFrameIdx:
            #for toolName,tool in framesPerIdx[k].items():
                #if toolName in pipelineTools.keys():
                    #tool["checkpointFiles"] = []
                
        #print("===========================================================")
        
        
        # setup recovery for all frames
        print("Setup pipeline tools with file info ========================")
        for frame in allFrames:
            
            # walk all tools in pipeline (visit all once!)
            for tool in pipelineTools.keys():
               addTool(frame,tool,set())
                       

        print("===============================================================")