def graph_sum(): prod = NumberProducer(1000) prod.name = 'NumberProducer' s = parallelSum() graph = WorkflowGraph() graph.connect(prod, 'output', s, 'input') return graph
def esgf_workflow(source, worker, monitor=None, headers=None): graph = WorkflowGraph() # TODO: configure limit esgsearch = EsgSearch( url=wps_url(), search_url=source.get('url', 'https://esgf-data.dkrz.de/esg-search'), constraints=source.get('constraints', source.get('facets')), # facets for backward compatibility query=source.get('query'), limit=source.get('limit', 100), search_type='File', distrib=source.get('distrib'), replica=source.get('replica'), latest=source.get('latest'), temporal=source.get('temporal'), start=source.get('start'), end=source.get('end')) esgsearch.set_monitor(monitor, 0, 10) download = Download(url=wps_url(), headers=headers) download.set_monitor(monitor, 10, 50) doit = GenericWPS(headers=headers, **worker) doit.set_monitor(monitor, 50, 100) graph.connect(esgsearch, esgsearch.OUTPUT_NAME, download, download.INPUT_NAME) graph.connect(download, download.OUTPUT_NAME, doit, doit.INPUT_NAME) result = simple_process.process(graph, inputs={esgsearch: [{}]}) status_location = result.get((doit.id, doit.STATUS_LOCATION_NAME))[0] status = result.get((doit.id, doit.STATUS_NAME))[0] return dict(worker=dict(status_location=status_location, status=status))
def create_pipeline(chain, name_prefix='SeismoStreamPE_', name_suffix=''): ''' Creates a composite PE wrapping a pipeline that processes obspy streams. :param chain: list of functions that process obspy streams. The function takes one input parameter, stream, and returns an output stream. :param requestId: id of the request that the stream is associated with :param controlParameters: environment parameters for the processing elements :rtype: dictionary inputs and outputs of the composite PE that was created ''' prev = None first = None graph = WorkflowGraph() for fn_desc in chain: try: fn = fn_desc[0] params = fn_desc[1] except TypeError: fn = fn_desc params = {} pe = SeismoStreamPE(fn, params) pe.name = name_prefix + fn.__name__ + name_suffix if prev: graph.connect(prev, SeismoStreamPE.OUTPUT_NAME, pe, SeismoStreamPE.INPUT_NAME) else: first = pe prev = pe # Map inputs and outputs of the wrapper to the nodes in the subgraph graph.inputmappings = {'input': (first, INPUT_NAME)} graph.outputmappings = {'output': (prev, OUTPUT_NAME)} return graph
def createWf(): graph = WorkflowGraph() plot = CompMatrix(variables_number) mc = MaxClique(-0.01) plot.numprocesses = 4 #plot.prov_cluster="my" start = Start() #startprov_cluster="my" sources = {} inputs_stock = {0: {'name': 'VNET'}, 1: {'name': 'AGTK'}} componentsType['VNET'] = ProvenanceStockType for i in range(0, variables_number): sources[i] = Source(sampling_rate, i, inputs_stock[i]['name']) sources[i].prov_cluster = "obs_cluster" for h in range(0, variables_number): graph.connect(start, 'output', sources[h], 'iterations') for j in range(h + 1, variables_number): cc = CorrCoef(batch_size, (h, j)) cc.prov_cluster = "obs_cluster" cc.name = sources[h].name + "_" + sources[j].name plot._add_input('input' + '_' + str(h) + '_' + str(j), grouping=[1]) #graph.connect(sources[h],'output',cc,'input1') #graph.connect(sources[j],'output',cc,'input2') #graph.connect(cc,'output',plot,'input'+'_'+str(h)+'_'+str(j)) cc.single = True #cc.numprocesses=1 #graph.connect(plot,'output',mc,'matrix') return graph
def testContinuousReduce(): prod = NumberProducer() test = TestPE() graph = WorkflowGraph() graph.connect(prod, 'output', test, 'input') results = simple_process.process_and_return(graph, {prod: 5}) tools.eq_({test.id: {'output': [[0] for i in range(5)]}}, results)
def esgf_workflow(source, worker, monitor=None, headers=None): graph = WorkflowGraph() # TODO: configure limit esgsearch = EsgSearch( url=wps_url(), search_url=source.get('url', 'https://esgf-data.dkrz.de/esg-search'), constraints=source.get( 'constraints', source.get('facets')), # facets for backward compatibility query=source.get('query'), limit=source.get('limit', 100), search_type='File', distrib=source.get('distrib'), replica=source.get('replica'), latest=source.get('latest'), temporal=source.get('temporal'), start=source.get('start'), end=source.get('end')) esgsearch.set_monitor(monitor, 0, 10) download = Download(url=wps_url(), headers=headers) download.set_monitor(monitor, 10, 50) doit = GenericWPS(headers=headers, **worker) doit.set_monitor(monitor, 50, 100) graph.connect(esgsearch, esgsearch.OUTPUT_NAME, download, download.INPUT_NAME) graph.connect(download, download.OUTPUT_NAME, doit, doit.INPUT_NAME) result = simple_process.process_and_return(graph, inputs={esgsearch: [{}]}) status_location = result[doit.id][doit.STATUS_LOCATION_NAME][0] status = result[doit.id][doit.STATUS_NAME][0] return dict(worker=dict(status_location=status_location, status=status))
def testConsumer(): graph = WorkflowGraph() prod = TestProducer() cons = PrintDataConsumer() graph.connect(prod, 'output', cons, 'input') results = simple_process.process_and_return(graph, {prod: 10}) tools.eq_({}, results)
def testConsumer(): graph = WorkflowGraph() prod = TestProducer() cons = PrintDataConsumer() graph.connect(prod, "output", cons, "input") results = simple_process.process_and_return(graph, {prod: 10}) tools.eq_({}, results)
def create_iterative_chain(functions, FunctionPE_class=SimpleFunctionPE, name_prefix='PE_', name_suffix=''): prev = None first = None graph = WorkflowGraph() for fn_desc in functions: try: fn = fn_desc[0] params = fn_desc[1] except TypeError: fn = fn_desc params = {} # print 'adding %s to chain' % fn.__name__ pe = FunctionPE_class() pe.compute_fn = fn pe.params = params pe.name = name_prefix + fn.__name__ + name_suffix if prev: graph.connect(prev, IterativePE.OUTPUT_NAME, pe, IterativePE.INPUT_NAME) else: first = pe prev = pe # Map inputs and outputs of the wrapper to the nodes in the subgraph graph.inputmappings = {'input': (first, IterativePE.INPUT_NAME)} graph.outputmappings = {'output': (prev, IterativePE.OUTPUT_NAME)} return graph
def testIterative(): graph = WorkflowGraph() prod = TestProducer() cons = TestIterative() graph.connect(prod, "output", cons, "input") results = simple_process.process_and_return(graph, {prod: 25}) tools.eq_({cons.id: {"output": list(range(1, 26))}}, results)
def testWriter(): graph = WorkflowGraph() prod = TestProducer() cons1 = TestOneInOneOutWriter() graph.connect(prod, "output", cons1, "input") results = simple_process.process_and_return(graph, {prod: 5}) tools.eq_({cons1.id: {"output": list(range(1, 6))}}, results)
def test_types(): graph = WorkflowGraph() prod = TestProducer() cons = TestOneInOneOut() graph.connect(prod, "output", cons, "input") graph.propagate_types() tools.eq_(prod.outputconnections["output"]["type"], cons.inputconnections["input"]["type"])
def main(): if len(sys.argv) < 5: print "Incorrect arguments provided. Proper format: python tupleCounter.py <inputFile> <numRepeats> <outputFile> <numCores>" sys.exit() inputFilename = sys.argv[1] numRepeats = int(sys.argv[2]) outputFile = sys.argv[3] numCores = int(sys.argv[4]) producer = TupleProducer(inputFilename, numRepeats) makeDicts = MakeDict() collector = CollectCounts(outputFile) graph = WorkflowGraph() graph.connect(producer, 'output', makeDicts, 'input') graph.connect(makeDicts, 'output', collector, 'input') from dispel4py.new.multi_process import process as multi_process import argparse args = argparse.Namespace args.num = numCores args.simple = False multi_process(graph, {producer: 1}, args)
def testIterative(): graph = WorkflowGraph() prod = TestProducer() cons = TestIterative() graph.connect(prod, 'output', cons, 'input') results = simple_process.process_and_return(graph, {prod: 25}) tools.eq_({cons.id: {'output': list(range(1, 26))}}, results)
def testWriter(): graph = WorkflowGraph() prod = TestProducer() cons1 = TestOneInOneOutWriter() graph.connect(prod, 'output', cons1, 'input') results = simple_process.process_and_return(graph, {prod: 5}) tools.eq_({cons1.id: {'output': list(range(1, 6))}}, results)
def testWordCount(): prod = RandomWordProducer() filt = RandomFilter() count = WordCounter() graph = WorkflowGraph() graph.connect(prod, 'output', filt, 'input') graph.connect(filt, 'output', count, 'input') simple_process.process(graph, inputs={prod: 100})
def testWordCount(): prod = RandomWordProducer() filt = RandomFilter() count = WordCounter() graph = WorkflowGraph() graph.connect(prod, "output", filt, "input") graph.connect(filt, "output", count, "input") simple_process.process(graph, inputs={prod: 100})
def test_types(): graph = WorkflowGraph() prod = TestProducer() cons = TestOneInOneOut() graph.connect(prod, 'output', cons, 'input') graph.propagate_types() tools.eq_(prod.outputconnections['output']['type'], cons.inputconnections['input']['type'])
def parallelAvg(index=0): composite = WorkflowGraph() parAvg = AverageParallelPE(index) reduceAvg = AverageReducePE() composite.connect(parAvg, parAvg.OUTPUT_NAME, reduceAvg, reduceAvg.INPUT_NAME) composite.inputmappings = { 'input' : (parAvg, parAvg.INPUT_NAME) } composite.outputmappings = { 'output' : (reduceAvg, reduceAvg.OUTPUT_NAME) } return composite
def graph_min_max(): prod = NumberProducer(1000) mi = parallelMin() ma = parallelMax() graph = WorkflowGraph() graph.connect(prod, 'output', mi, 'input') graph.connect(prod, 'output', ma, 'input') return graph
def testOnetoAll(): graph = WorkflowGraph() prod = t.TestProducer() cons = t.TestOneInOneOut() cons.numprocesses = 2 cons.inputconnections["input"]["grouping"] = "all" graph.connect(prod, "output", cons, "input") return graph
def parallelStdDev(index=0): composite = WorkflowGraph() parStdDev = StdDevPE(index) reduceStdDev = StdDevReducePE() composite.connect(parStdDev, parStdDev.OUTPUT_NAME, reduceStdDev, reduceStdDev.INPUT_NAME) composite.inputmappings = { 'input' : (parStdDev, parStdDev.INPUT_NAME) } composite.outputmappings = { 'output' : (reduceStdDev, reduceStdDev.OUTPUT_NAME) } return composite
def testOnetoAll(): graph = WorkflowGraph() prod = t.TestProducer() cons = t.TestOneInOneOut() cons.numprocesses = 2 cons.inputconnections['input']['grouping'] = 'all' graph.connect(prod, 'output', cons, 'input') return graph
def testWriter(): graph = WorkflowGraph() prod = TestProducer() prev = prod cons1 = TestOneInOneOutWriter() graph.connect(prod, 'output', cons1, 'input') results = simple_process.process_and_return(graph, {prod: [{}, {}, {}, {}, {}]}) tools.eq_({ cons1.id : {'output': [1, 2, 3, 4, 5]} }, results)
def testTee(): graph = WorkflowGraph() prod = TestProducer() cons1 = TestOneInOneOut() cons2 = TestOneInOneOut() graph.connect(prod, 'output', cons1, 'input') graph.connect(prod, 'output', cons2, 'input') multiprocess(graph, 3, [{}, {}, {}])
def parallel_aggregate(instPE, reducePE): composite = WorkflowGraph() reducePE.inputconnections[AggregatePE.INPUT_NAME]['grouping'] = 'global' reducePE.numprocesses = 1 composite.connect(instPE, AggregatePE.OUTPUT_NAME, reducePE, AggregatePE.INPUT_NAME) composite.inputmappings = { 'input' : (instPE, AggregatePE.INPUT_NAME) } composite.outputmappings = { 'output' : (reducePE, AggregatePE.OUTPUT_NAME) } return composite
def parallelAvg(index=0): composite = WorkflowGraph() parAvg = AverageParallelPE(index) reduceAvg = AverageReducePE() composite.connect(parAvg, parAvg.OUTPUT_NAME, reduceAvg, reduceAvg.INPUT_NAME) composite.inputmappings = {'input': (parAvg, parAvg.INPUT_NAME)} composite.outputmappings = {'output': (reduceAvg, reduceAvg.OUTPUT_NAME)} return composite
def parallel_aggregate(instPE, reducePE): composite = WorkflowGraph() reducePE.inputconnections[AggregatePE.INPUT_NAME]['grouping'] = 'global' reducePE.numprocesses = 1 composite.connect(instPE, AggregatePE.OUTPUT_NAME, reducePE, AggregatePE.INPUT_NAME) composite.inputmappings = {'input': (instPE, AggregatePE.INPUT_NAME)} composite.outputmappings = {'output': (reducePE, AggregatePE.OUTPUT_NAME)} return composite
def testPipeline(): prod = TestProducer() cons1 = TestOneInOneOut() cons2 = TestOneInOneOut() graph = WorkflowGraph() graph.connect(prod, 'output', cons1, 'input') graph.connect(cons1, 'output', cons2, 'input') results = simple_process.process_and_return(graph, inputs={ prod : [ {}, {}, {}, {}, {} ] } ) tools.eq_({ cons2.id : { 'output' : [1, 2, 3, 4, 5] } }, results)
def testPipeline(): prod = TestProducer() cons1 = TestOneInOneOut() cons2 = TestOneInOneOut() graph = WorkflowGraph() graph.connect(prod, "output", cons1, "input") graph.connect(cons1, "output", cons2, "input") results = simple_process.process_and_return(graph, inputs={prod: 5}) tools.eq_({cons2.id: {"output": list(range(1, 6))}}, results)
def testTee(): graph = WorkflowGraph() prod = TestProducer() prev = prod cons1 = TestOneInOneOut() cons2 = TestOneInOneOut() graph.connect(prod, 'output', cons1, 'input') graph.connect(prod, 'output', cons2, 'input') return graph
def testTwoPipelines(): graph = WorkflowGraph() prod1 = TestProducer() cons1 = TestOneInOneOut() prod2 = TestProducer() cons2 = TestOneInOneOut() graph.connect(prod1, 'output', cons1, 'input') graph.connect(prod2, 'output', cons2, 'input') multiprocess(graph, 2, [{}, {}, {}, {}, {}])
def testTee(): graph = WorkflowGraph() prod = TestProducer() prev = prod cons1 = TestOneInOneOut() cons2 = TestOneInOneOut() graph.connect(prod, 'output', cons1, 'input') graph.connect(prod, 'output', cons2, 'input') args.num = 3 process(graph, inputs={prod: [{}, {}, {}, {}, {}]}, args=args)
def testTee(): graph = WorkflowGraph() prod = TestProducer() cons1 = TestOneInOneOut() cons2 = TestOneInOneOut() graph.connect(prod, 'output', cons1, 'input') graph.connect(prod, 'output', cons2, 'input') results = simple_process.process(graph, {prod: [{}, {}, {}, {}, {}]}) tools.eq_({(cons1.id, 'output'): [1, 2, 3, 4, 5], (cons2.id, 'output'): [1, 2, 3, 4, 5]}, results)
def testPipelineWithInputId(): graph = WorkflowGraph() first = TestOneInOneOut() prev = first for i in range(5): cons = TestOneInOneOut() graph.connect(prev, 'output', cons, 'input') prev = cons results = simple_process.process(graph, { first.id: [{'input': 1}] } ) tools.eq_({(prev.id, 'output'):[1]}, results)
def testPipelineWithInputId(): graph = WorkflowGraph() first = TestOneInOneOut() prev = first for i in range(5): cons = TestOneInOneOut() graph.connect(prev, 'output', cons, 'input') prev = cons results = simple_process.process(graph, {first.id: [{'input': 1}]}) tools.eq_({(prev.id, 'output'): [1]}, results)
def testPipeline(): graph = WorkflowGraph() prod = TestProducer() prev = prod for i in range(5): cons = TestOneInOneOut() graph.connect(prev, 'output', cons, 'input') prev = cons results = simple_process.process(graph, {prod: [{}, {}, {}, {}, {}]}) tools.eq_({(prev.id, 'output'): [1, 2, 3, 4, 5]}, results)
def test_pipeline(): prod = t.TestProducer() cons1 = t.TestOneInOneOut() cons2 = t.TestOneInOneOut() graph = WorkflowGraph() graph.connect(prod, 'output', cons1, 'input') graph.connect(cons1, 'output', cons2, 'input') process(graph, {prod: [{}, {}, {}]})
def InitiateNewRun( graph, provRecorderClass, provImpClass=ProvenancePE, input=[], username=None, workflowId=None, description="", system_id=None, workflowName=None, w3c_prov=False, runId=None, clustersRecorders={}, feedbackPEs=[]): if username is None or workflowId is None or workflowName is None: raise Exception("Missing values") if runId is None: runId = getUniqueId() newrun = NewWorkflowRun() newrun.parameters = {"input": input, "username": username, "workflowId": workflowId, "description": description, "system_id": system_id, "workflowName": workflowName, "runId": runId, "mapping": sys.argv[1] } _graph = WorkflowGraph() provrec0 = provRecorderClass(toW3C=w3c_prov) _graph.connect(newrun, "output", provrec0, provrec0.INPUT_NAME) # attachProvenanceRecorderPE(_graph,provRecorderClass,runId,username,w3c_prov) # newrun.provon=True simple_process.process(_graph, {'NewWorkflowRun': [{'input': 'None'}]}) injectProv(graph, provImpClass) print("PREPARING PROVENANCE RECORDERS:") print("Provenance Recorders Clusters: "+str(clustersRecorders)) print("PEs processing Recorders feedback: "+str(feedbackPEs)) attachProvenanceRecorderPE( graph, provRecorderClass, runId, username, w3c_prov, clustersRecorders, feedbackPEs) provclusters={} return runId
def testPipeline(): prod = TestProducer() cons1 = TestOneInOneOut() cons2 = TestOneInOneOut() graph = WorkflowGraph() graph.connect(prod, 'output', cons1, 'input') graph.connect(cons1, 'output', cons2, 'input') args = argparse.Namespace args.num = 5 args.simple = False process(graph, inputs={ prod : [ {}, {}, {} ] }, args=args )
def parallelStdDev(index=0): ''' Creates a STDDEV composite PE that can be parallelised using a map-reduce pattern. ''' composite = WorkflowGraph() parStdDev = StdDevPE(index) reduceStdDev = StdDevReducePE() composite.connect(parStdDev, parStdDev.OUTPUT_NAME, reduceStdDev, reduceStdDev.INPUT_NAME) composite.inputmappings = { 'input' : (parStdDev, parStdDev.INPUT_NAME) } composite.outputmappings = { 'output' : (reduceStdDev, reduceStdDev.OUTPUT_NAME) } return composite
def parallelAvg(index=0): ''' Creates an AVG composite PE that can be parallelised using a map-reduce pattern. ''' composite = WorkflowGraph() parAvg = AverageParallelPE(index) reduceAvg = AverageReducePE() composite.connect(parAvg, parAvg.OUTPUT_NAME, reduceAvg, reduceAvg.INPUT_NAME) composite.inputmappings = { 'input' : (parAvg, parAvg.INPUT_NAME) } composite.outputmappings = { 'output' : (reduceAvg, reduceAvg.OUTPUT_NAME) } return composite
def parallelStdDev(index=0): composite = WorkflowGraph() parStdDev = StdDevPE(index) reduceStdDev = StdDevReducePE() composite.connect(parStdDev, parStdDev.OUTPUT_NAME, reduceStdDev, reduceStdDev.INPUT_NAME) composite.inputmappings = {'input': (parStdDev, parStdDev.INPUT_NAME)} composite.outputmappings = { 'output': (reduceStdDev, reduceStdDev.OUTPUT_NAME) } return composite
def testSquare(): graph = WorkflowGraph() prod = TestProducer(2) cons1 = TestOneInOneOut() cons2 = TestOneInOneOutWriter() last = TestTwoInOneOut() graph.connect(prod, 'output0', cons1, 'input') graph.connect(prod, 'output1', cons2, 'input') graph.connect(cons1, 'output', last, 'input0') graph.connect(cons2, 'output', last, 'input1') return graph
def testPipeline(): prod = TestProducer() cons1 = TestOneInOneOut() cons2 = TestOneInOneOut() graph = WorkflowGraph() graph.connect(prod, 'output', cons1, 'input') graph.connect(cons1, 'output', cons2, 'input') args = argparse.Namespace args.num = 5 args.simple = False process(graph, inputs={prod: [{}, {}, {}]}, args=args)
def parallelAvg(index=0): ''' Creates an AVG composite PE that can be parallelised using a map-reduce pattern. ''' composite = WorkflowGraph() parAvg = AverageParallelPE(index) reduceAvg = AverageReducePE() composite.connect(parAvg, parAvg.OUTPUT_NAME, reduceAvg, reduceAvg.INPUT_NAME) composite.inputmappings = {'input': (parAvg, parAvg.INPUT_NAME)} composite.outputmappings = {'output': (reduceAvg, reduceAvg.OUTPUT_NAME)} return composite
def testAlltoOne(): graph = WorkflowGraph() prod = t.TestProducer() cons1 = t.TestOneInOneOut() cons2 = t.TestOneInOneOut() cons1.numprocesses = 5 cons2.numprocesses = 5 graph.connect(prod, 'output', cons1, 'input') cons2.inputconnections['input']['grouping'] = 'global' graph.connect(cons1, 'output', cons2, 'input') return graph
def testSquare(): graph = WorkflowGraph() prod = TestProducer(2) cons1 = TestOneInOneOut() cons2 = TestOneInOneOut() last = TestTwoInOneOut() graph.connect(prod, 'output0', cons1, 'input') graph.connect(prod, 'output1', cons2, 'input') graph.connect(cons1, 'output', last, 'input0') graph.connect(cons2, 'output', last, 'input1') results = simple_process.process_and_return(graph, {prod: [{}]}) tools.eq_({last.id: {'output': ['1', '1']}}, results)
def testSquare(): graph = WorkflowGraph() prod = TestProducer(2) cons1 = TestOneInOneOut() cons2 = TestOneInOneOut() last = TestTwoInOneOut() graph.connect(prod, 'output0', cons1, 'input') graph.connect(prod, 'output1', cons2, 'input') graph.connect(cons1, 'output', last, 'input0') graph.connect(cons2, 'output', last, 'input1') args.num = 4 process(graph, inputs={prod: [{}]}, args=args)
def testNotEnoughProcesses(): prod = TestProducer() cons1 = TestOneInOneOut() cons2 = TestOneInOneOut() graph = WorkflowGraph() graph.connect(prod, 'output', cons1, 'input') graph.connect(cons1, 'output', cons2, 'input') args = argparse.Namespace args.num = 1 args.simple = False args.results = True message = process(graph, inputs={prod: 5}, args=args) tools.ok_('Not enough processes' in message)