Exemplo n.º 1
0
def read_device(device,use_cache=True):
    if device in device_struct_cache and use_cache and time.time() < device_timeouts[device] + CACHE_TIMEOUT:
        return device_struct_cache[device]
    else:
        device_struct_cache[device] = config.read_struct(config.map['global']['device_dir']+'/'+device+".dev");
        device_timeouts[device] = time.time()
        return device_struct_cache[device]
Exemplo n.º 2
0
def read_source(source,use_cache=True):
    if source in source_struct_cache and use_cache and time.time() < source_timeouts[source] + CACHE_TIMEOUT:
        return source_struct_cache[source]
    else:
        source_struct_cache[source] = config.read_struct(config.map['global']['source_dir']+'/'+source+".src");
        source_timeouts[source] = time.time()
        return source_struct_cache[source]
Exemplo n.º 3
0
    list : List available flows to run
    
    run <flow name> [--from <from time>] [--to <to time>] [--pretend] [--nocache] [--local] [--param name=value] [<source name> <device identifier>] 
        Run the given flow.  Optional time interval, default is past day.
        Optional source name and device identifier, default is all known devices.
        --pretend is for testing, doesn't actually add the tasks to the task manager
        --nocache redoes all of the steps instead of looking for cached steps
        --local makes no connections to the postgres database and manually runs the tasks, dumping the output in the current directory
            you must specify sourcename and device identifier
"""
elif sys.argv[0] == "list":
    for file in os.listdir(config.map["global"]["flow_dir"]):
        if file.endswith(".flow"):
            name = file[file.find("/") + 1 : -5]
            print "Flow name: " + name
            fmap = config.read_struct(config.map["global"]["flow_dir"] + "/" + file)
            print "\tSource type(s): [" + ",".join(fmap["source_types"]) + "]"
            print "\tTasks:\n\t\t" + "\n\t\t".join(fmap["tasks"])
            print ""
elif sys.argv[0] == "run":
    fromtime = datetime.now() - timedelta(weeks=1)
    totime = datetime.now()

    ret, vals, sys.argv = utils.check_arg(sys.argv, "--from", 1)
    if ret:
        try:
            fromtime = utils.str_to_date(vals[0])
        except ValueError, msg:
            print "Bad from time: " + str(msg)

    ret, vals, sys.argv = utils.check_arg(sys.argv, "--to", 1)
def read_flow_file(fname):
    if not fname.endswith('.flow'):
        fname = config.map['global']['flow_dir']+'/'+fname+".flow"
        
    lmap = config.read_struct(fname);
    if '/' in fname:
        fname = fname[fname.rfind('/')+1:]
    
    if fname.endswith('.flow'):
        fname = fname[:-5]
    
    if ( lmap == None ):
        raise ValueError("Could not load flow: "+fname)

    flow = FlowDef();
    flow.name = fname
    flow.source_types = lmap['source_types']
    flow.use_tmp = False if not lmap.has_key('use_tmp') or lmap['use_tmp']=='0' else True
    flow.files = []
    flow.steps = []
    flow.outputs = []
    for tsk in lmap['tasks']:
        step = Step()
        step.name = tsk
        step.outputs = []
        step.inputs = []
        step.profile = config.getdict(lmap,tsk+'.profile','cpu_bound')
        if step.profile not in [i[0:i.find(',')] for i in config.map['mod_scheduler']['profiles']]:
            print "ERROR: profile tag %s not found in config files"%step.profile
            sys.exit(1)
            
        step.cmd = lmap[tsk+'.cmd']
        flow.steps.append(step)
        
    for step in flow.steps:
        # look for output files
        idx = -1
        while ( True ):
            idx = step.cmd.find('%O',idx+1)
            if idx == -1: break
            file = File()
            file.cached = False
            step.outputs.append(file)
            file.src = step
            file.dests = []
            file.index = int(step.cmd[idx+2:idx+3])
            if len(step.cmd) > idx+3 and step.cmd[idx+3] == 'D':
                file.directory = True
            else:
                file.directory = False
                
            flow.files.append(file)
            
    for step in flow.steps:
        # look for input files
        idx = -1
        while ( True ):
            idx = step.cmd.find('%I',idx+1)
            if idx == -1: break
            outspec = lmap[step.name+'.'+step.cmd[idx+1:idx+3]]
            found = False
            for file in flow.files:
                idx2 = outspec.find('.')
                if ( file.src.name == outspec[0:idx2] and file.index == int(outspec[idx2+2:idx2+3])):
                    found = True
                    step.inputs.append(file)
                    file.dests.append((step,int(step.cmd[idx+2:idx+3])))
                    if len(step.cmd) > idx+3 and step.cmd[idx+3] == 'D' and not file.directory:
                        print "ERROR: Output file %d from %s was specified as a directory, but task %s believes its not"%(file.index,file.src.name,step.name)
                        sys.exit(1)
                    break
                
            if ( not found ):
                print "ERROR: Couldn't find output "+outspec
                sys.exit(1)
    
    if 'outputs' in lmap:
        index = 0
        for file in flow.files:
            if ( file.src.name+'.O%d'%file.index in lmap['outputs'] ):
                found = True
                file.dests.append(('OUTPUT',index))
                flow.outputs.append(file)
                index += 1
                break
    
        # now prune any steps/files not needed
        for step in flow.steps:
            step.mark = False
        for file in flow.files:
            file.mark = False
    
        def mark_recursive(file):
            file.src.mark = True
            file.mark = True
            for f in file.src.inputs:
                mark_recursive(f)
    
        for file in flow.outputs:
            mark_recursive(file)
        
        for step in flow.steps[:]:
            if not step.mark:
                print "Pruning step %s: no connection to output"%(step.name)
                flow.steps.remove(step)
    
        for file in flow.files[:]:
            if not file.mark:
                print "Pruning file %s.O%d : no connection to output"%(file.src.name,file.index)
                flow.files.remove(file)
    else:
        print "No outputs in flow definition, all tasks are run"
    
    def build_file_dep_str(file):
        return file.src.name+'('+','.join([build_file_dep_str(f) for f in file.src.inputs])+')[%d]'%file.index
        
    for file in flow.files:
        file.stepchain = build_file_dep_str(file)
    
    if (len(flow.steps) <= 0 ):
        raise Exception("ERROR: No steps left after pruning")    
    
    for file in flow.files:
        if ( len(file.dests) == 0 ):
            print "WARNING: Task %s.O%d is not connected to anything"%(file.src.name, file.index)
            
    return flow