def test_graph_feedback_do_not_develop_sub_graph(self): SimulExec = dfp.DataflowEnvironment() SimulExec.set_call_rets('position') SimulExec.set_call_args('trade') SimulExec.add_node('Execution', execution) SimulExec.add_node('PositionUpdate', add) SimulExec.add_node('LaggedPosition', dfp.Lag(0)) SimulExec.add_edge('Execution', 'PositionUpdate', 'trade', 'a') SimulExec.add_edge('LaggedPosition', 'PositionUpdate', 'b') SimulExec.add_edge('PositionUpdate', 'LaggedPosition') SimulExec.add_edge_call_args('Execution', 'trade') SimulExec.add_edge_call_rets('LaggedPosition', 'position') SimulExec.start() g = dfp.DataflowEnvironment() g.add_node('Decision', decision) g.add_node('SimulExec', SimulExec) g.add_edge('Decision', 'SimulExec', 'trade', 'trade') g.add_edge('SimulExec', 'Decision', 'position', 'position') g.set_call_args('obs') g.set_call_rets('position') g.add_edge_call_args('Decision', 'observation') g.add_edge_call_rets('SimulExec', 'position', 'position') f = lambda: g.start(develop=False) self.assertRaises(dfpe.StalledDataflowCallError, f)
def test_graph_feedback(self): SimulExec = dfp.DataflowEnvironment() SimulExec.set_call_rets('position') SimulExec.set_call_args('trade') SimulExec.add_node('Execution', execution) SimulExec.add_node('PositionUpdate', add) SimulExec.add_node('LaggedPosition', dfp.Lag(0)) SimulExec.add_edge('Execution', 'PositionUpdate', 'trade', 'a') SimulExec.add_edge('LaggedPosition', 'PositionUpdate', 'b') SimulExec.add_edge('PositionUpdate', 'LaggedPosition') SimulExec.add_edge_call_args('Execution', 'trade') SimulExec.add_edge_call_rets('LaggedPosition', 'position') SimulExec.start() g = dfp.DataflowEnvironment() g.add_node('Decision', decision) g.add_node('SimulExec', SimulExec) g.add_edge('Decision', 'SimulExec', 'trade', 'trade') g.add_edge('SimulExec', 'Decision', 'position', 'position') g.set_call_args('obs') g.set_call_rets('position') g.add_edge_call_args('Decision', 'observation') g.add_edge_call_rets('SimulExec', 'position') g.start(develop=True) datas = [g(obs=10) for __ in range(10)] assert datas[-1].position == 89.10000000000001
def test_graph_lqg_feedback(self): """ epsilonp variable is set in order to have coverage on the "self.add_edge(*edge)" part of the code. """ # Make SimulExec SimulExec = dfp.DataflowEnvironment(name='SimulExec') SimulExec.set_call_rets('position') SimulExec.set_call_args('trade') SimulExec.add_node('Execution', execution) SimulExec.add_node('PositionUpdate', add) SimulExec.add_node('LaggedPosition', dfp.Lag(0)) SimulExec.add_edge('Execution', 'PositionUpdate', 'trade', 'a') SimulExec.add_edge('LaggedPosition', 'PositionUpdate', 'b') SimulExec.add_edge('PositionUpdate', 'LaggedPosition') SimulExec.add_edge_call_args('Execution', 'trade') SimulExec.add_edge_call_rets('LaggedPosition', 'position') SimulExec.start() # Make Market Plant MarketPlant = dfp.DataflowEnvironment(name='MarketPlant') MarketPlant.set_call_rets('position,observation, epsilonp') MarketPlant.set_call_args('trade,observation, epsilonp') MarketPlant.add_edge_call_args_rets('epsilonp') MarketPlant.add_node('SimulExec', SimulExec) MarketPlant.add_edge_call_args_rets('observation') MarketPlant.add_edge_call_rets('SimulExec', 'position', 'position') MarketPlant.add_edge_call_args('SimulExec', 'trade', 'trade') MarketPlant.start() # Make final Dataflow def decision(epsilonp, position, observation): return {'trade': 10, 'espilonp': epsilonp} LQG = dfp.DataflowEnvironment(name='LQG') LQG.add_node('Decision', decision) LQG.add_node('MarketPlant', MarketPlant) LQG.add_node('Data', DataGenerator()) LQG.add_edge('Decision', 'MarketPlant', 'trade', 'trade') LQG.add_edge('MarketPlant', 'Decision') LQG.add_edge('Data', 'MarketPlant', 'observations', 'observation') LQG.set_call_args('epsilonp') LQG.add_edge_call_args('MarketPlant') LQG.set_call_rets('position, epsilonp') LQG.add_edge_call_rets('MarketPlant') LQG.start(develop=True, level=2) datas = [LQG(0.1) for __ in range(10)] assert datas[-1].position == 89.10000000000001
def test_use_graph_as_callable_automatic_edges_2_args(self): g = dfp.DataflowEnvironment() g.add_node('Decision', decision, rets='trade') g.add_node('Execution', execution, args='trade', rets='trade') g.add_node('PositionUpdate', add) g.add_node('LaggedPosition', dfp.Lag(0)) g.add_edge('LaggedPosition', 'Decision', 'position') ############################### # call add_edge with 2 arguments ############################### g.add_edge('Decision', 'Execution') ############################### g.add_edge('Execution', 'PositionUpdate', 'trade', 'a') g.add_edge('LaggedPosition', 'PositionUpdate', 'b') g.add_edge('PositionUpdate', 'LaggedPosition') g.set_call_args('obs') g.set_call_rets('position') g.add_edge_call_args('Decision', 'obs', 'observation') g.add_edge_call_rets('LaggedPosition', 'position') g.start() datas = [] for __ in range(10): data = g(obs=10) datas.append(data.position) assert datas[-1] == 89.10000000000001
def test_use_graph_as_callable_test_rets(self): g = dfp.DataflowEnvironment() g.add_node('Decision', decision, rets='test_trade') g.add_node('Execution', execution) f = lambda: g.add_edge('Decision', 'Execution', 'trade', 'trade') self.assertRaises(dfpe.WrongEdgeArgsError, f)
def test_add_edge_set_attr_and_missing_node(self): g = dfp.DataflowEnvironment() g.add_node('data_appender', DataAppender()) g.add_node('data_generator', DataGenerator()) f = lambda: g.add_edge('data_generator', 'wrong_node_id', 'observations', 'data', dict(color='red')) self.assertRaises(dfpe.UnknownNodeError, f)
def test_DataflowEnvironment_as_Generator_Chained_Partial(self): """ The nodes are specified in the wrong order. The excution plan is determined in the lock() method. There is no data in generator but arguments. datain is a coroutine made from datain func with coroutine from func... f is a coroutine g is a funcion terminal is a function """ attr_f = self.attr_f data_in = self.data_in data_out = self.data_out f_co = coroutine_from_func(f) dtf = dfp.DataflowEnvironment() dtf.add_cotask(f_co, filters='call_args', **attr_f) dtf.add_task(g, filters=dict(args=[f_co])) dtf.add_edge_call_rets(g) dtf.start() results = [] for res in dtf.gen(data_in.gen()): results.append(res) assert results == self.results
def test_DataflowEnvironment_NotNamedTasks(self): """ The nodes are specified in the wrong order. The excution plan is determined in the lock() method. datain is a coroutine made from datain func with coroutine from func... f is a coroutine g is a funcion terminal is a function """ attr_f = self.attr_f data_in = self.data_in data_out = self.data_out f_co = coroutine_from_func(f) in_co = coroutine_from_func(data_in.func) dtf = dfp.DataflowEnvironment() dtf.add_task(data_out.func, filters=dict(args=[g])) dtf.add_cotask(f_co, filters=in_co, **attr_f) dtf.add_task(g, filters=dict(args=[f_co])) dtf.add_cotask(in_co) dtf.start() dtf.run() assert data_out.out == self.results
def test_terminal_node_data_with_tasks_and_automatic_link(self): g = dfp.DataflowEnvironment() g.args = 'observation' g.add_task('Decision', decision, rets='trade', filters=dict(args=['LaggedPosition'])) g.add_edge_call_args('Decision') g.add_task('Execution', execution, rets='trade') g.add_task('PositionUpdate', add, args='trade, position', rets='position', filters=dict(args=['Execution', 'LaggedPosition'])) g.add_task('LaggedPosition', dfp.Lag(0), args='position', rets='position') g.rets = 'position' g.add_edge_call_rets('LaggedPosition') g.start() datas = [] for __ in range(10): data = g(observation=10) datas.append(data.position) assert datas[-1] == 89.10000000000001
def test_terminal_node_data(self): g = dfp.DataflowEnvironment() g.add_node('Decision', decision, rets='trade') g.add_node('Execution', execution, rets='trade') g.add_node('PositionUpdate', add, args='position,trade', rets='position') g.add_node('LaggedPosition', dfp.Lag(0), args='position', rets='position') g.add_edge('LaggedPosition', 'Decision') g.add_edge('Decision', 'Execution') g.add_edge('Execution', 'PositionUpdate') g.add_edge('LaggedPosition', 'PositionUpdate') g.add_edge('PositionUpdate', 'LaggedPosition') g.set_call_args('observation') g.set_call_rets('position') g.add_edge_call_args('Decision') g.add_edge_call_rets('LaggedPosition') g.start() datas = [] for __ in range(10): data = g(observation=10) datas.append(data.position) assert datas[-1] == 89.10000000000001
def test_DataflowEnvironment_AplyNonTrivialFilters(self): """ The nodes are specified in the wrong order. The excution plan is determined in the lock() method. We apply a non-trivial filter that should not be executed in the exection plan determination datain is a coroutine made from datain func with coroutine from func... f is a coroutine g is a funcion terminal is a function """ attr_f = self.attr_f data_in = self.data_in data_out = self.data_out f_co = coroutine_from_func(f) in_co = coroutine_from_func(data_in.func) end_func = data_out.func dtf = dfp.DataflowEnvironment() dtf.add_task(end_func, filters=dict(args=[(g, lambda x: -float(str(x)))])) dtf.add_cotask(in_co) dtf.add_task(g, filters=dict(args=[(f_co, lambda x: -float(str(x)))])) dtf.add_cotask(f_co, filters=(in_co, lambda x: -float(str(x))), **attr_f) dtf.start() dtf.run() assert data_out.out == [-990.0, -991.0, -992.0, -993.0, -994.0, -995.0, -996.0, -997.0, -998.0, -999.0]
def test_terminal_node_data_no_names(self): # declare functions position_update = add # lambda position, trade : position + trade pos_lag = dfp.Lag(0) g = dfp.DataflowEnvironment() g.args = 'observation' g.rets = 'position' g.add_node(decision) g.add_node(execution, rets='trade') g.add_node(position_update, args='position,trade') g.add_node(pos_lag, args='position', rets='position') g.add_edge(pos_lag, decision) g.add_edge(decision, execution) g.add_edge(execution, position_update) g.add_edge(pos_lag, position_update) g.add_edge(position_update, pos_lag) g.add_edge_call_args(decision) g.add_edge_call_rets(pos_lag) g.start() datas = [] for __ in range(10): data = g(observation=10) datas.append(data.position) assert datas[-1] == 89.10000000000001
def test_UnlinkedOutputRunTimeError(self): g = dfp.DataflowEnvironment() g.add_node('Execution', execution) g.set_call_args('obs') g.add_edge_call_args('Execution') g.start() assert None == g(10)
def test_raise_UnlinkedInputError(self): g = dfp.DataflowEnvironment() g.add_node('Execution', execution, rets='trade') g.set_call_rets('obs') g.add_edge_call_rets('Execution') f = lambda: g.start() self.assertRaises(dfpe.UnlinkedInputError, f)
def test_multiple_in_out_edge_not_specified(self): class DataAppender2(object): def __init__(self): self.data = [] def __call__(self, a, b, c): self.data.append([a, b, c]) def DataGenerator2(): return {'a': 'a_val', 'b': 'b_val', 'c': 'c_val'} g = dfp.DataflowEnvironment() data_appender = DataAppender2() g.add_node('data_appender', data_appender) g.add_node('data_generator', DataGenerator2, rets='a,b,c') g.add_edge('data_generator', 'data_appender') g.start() g() g() datas = data_appender.data # g has been called two times assert len(datas) == 2 for i in [0, 1]: assert len(datas[i]) == 3 a, b, c = datas[i] assert a == 'a_val' assert b == 'b_val' assert c == 'c_val'
def test_DataflowEnvironment_WithKwArgs(self): """ The nodes are specified in the wrong order. The excution plan is determined in the lock() method. There is no data in generator but arguments. datain is a coroutine made from datain func with coroutine from func... f is a coroutine g is a funcion terminal is a function """ attr_f = self.attr_f data_in = self.data_in data_out = self.data_out f_co = coroutine_from_func(f) dtf = dfp.DataflowEnvironment() dtf.args = ['my_data'] dtf.add_cotask(f_co, filters=('call_args', 'my_data'), **attr_f) dtf.add_task(g, filters=dict(args=[f_co])) dtf.add_task(data_out.func, filters=dict(args=[g])) dtf.start() # dtf.run() for i in range(10): dtf(my_data=i) assert data_out.out == self.results in_co = coroutine_from_func(data_in.func)
def test_graph_with_no_returns_specified_call_type_is_args(self): class DataGenerator2(object): def __call__(self): return np.ones(10), 30, 'a' class DataAppender2(object): def __init__(self): self.data = [] def __call__(self, a, b, c): self.data.append([a, b, c]) g = dfp.DataflowEnvironment() data_appender = DataAppender2() g.add_node('data_generator', DataGenerator2(), rets='a,b,c') g.add_node('data_appender', data_appender) g.add_edge('data_generator', 'data_appender') g.start() g() g() datas = data_appender.data # g has been called two times assert len(datas) == 2 for i in [0, 1]: assert len(datas[i]) == 3 a, b, c = datas[i] np.testing.assert_equal(a, np.ones(10)) assert b == 30 assert c == 'a'
def test_DataflowEnvironment_ReturnData(self): """ The nodes are specified in the wrong order. The excution plan is determined in the lock() method. There is no data in generator but arguments. datain is a coroutine made from datain func with coroutine from func... f is a coroutine g is a funcion terminal is a function """ attr_f = self.attr_f data_in = self.data_in data_out = self.data_out f_co = coroutine_from_func(f) dtf = dfp.DataflowEnvironment() in_co = coroutine_from_func(data_in.func) dtf.add_cotask(in_co) dtf.add_cotask(f_co, filters=in_co, **attr_f) dtf.add_task(g, filters=dict(args=[f_co])) dtf.add_edge_call_rets(g) dtf.start() # dtf.run() results = [] for i in range(10): res = dtf() results.append(res) assert results == self.results
def test_DataflowEnvironmentAutomaticOrderInLock(self): """ The nodes are specified in the wrong order. The excution plan is determined in the lock() method. datain is a coroutine made from datain func with coroutine from func... f is a coroutine g is a funcion terminal is a function """ attr_f = self.attr_f data_in = self.data_in data_out = self.data_out dtf = dfp.DataflowEnvironment() dtf.add_task('terminal', data_out.func, filters=dict(args=['g'])) dtf.add_cotask('f', coroutine_from_func(f), filters='indata', **attr_f) dtf.add_task('g', g, filters=dict(args=['f'])) dtf.add_cotask('indata', coroutine_from_func(data_in.func)) dtf.start() dtf.run() assert data_out.out == self.results
def test_contains_error(): G = dfp.DataflowEnvironment() # or DiGraph, MultiGraph, MultiDiGraph, etc G.add_path([0, 1, 2, 3], [lambda x: x] * 4) assert 1 in G assert (4 not in G) assert ('b' not in G) assert ([] not in G) # no exception for nonhashable assert ({1: 1} not in G) # no exception for nonhashable
def test_graph_incoherent_return_for_intern_call(self): g = dfp.DataflowEnvironment() g.add_node('data_appender', DataAppender()) g.add_node('data_generator', DataGenerator()) g.add_edge('data_generator', 'data_appender', 'observations_wrong', 'data') g.start() f = lambda: g() self.assertRaises(KeyError, f)
def test_add_edge_raise_DafpyError(self): g = dfp.DataflowEnvironment() g.add_node('data_appender', DataAppender()) g.add_node('data_generator', DataGenerator()) f = lambda: g.add_edge('data_generator', 'data_appender', 'observations', 'data', attr_dict=1) self.assertRaises(dfpe.DafpyError, f)
def develop_graph(self, level): """ Develop the nodes of the graph that are Dataflow callables """ if not self._is_locked: raise (dfpe.DataflowNotLockedError()) if len(self.developable_nodes()) == 0: return nodes_to_develop = copy.copy(self.developable_nodes()) developed_node = [] while len(nodes_to_develop) > 0: node_to_develop = nodes_to_develop.pop(0) g = dfp.DataflowEnvironment(name=self.name + '_develop_node_{!r}'.format(node_to_develop)) # set callable nodes which are not being developed for node_id, node_attr in self.iter_nodes(): if node_id != node_to_develop: g.add_node(node_id, attr_dict=node_attr) # if node_id in self.reset_task: # assert False, "check if it is correct" # g.reset_task[node_id] = g.task[node_id] # develop nodes and developable nodes's internal edges node_id = node_to_develop node_attr = self.node[node_id] g._add_nodes_from_graph(node_id, node_attr['func']) g._add_edges_from_graph_internals(node_id, node_attr['func']) # develop developable nodes's edges related to call args and ret for u, v, u_out, v_in, edge_attr in self.iter_edges(): if u == node_id: if v == node_id: g.add_edges_from_graph_in_and_out(u, v, u_out, v_in, edge_attr, self.node[u]['func'], self.node[v]['func']) else: g._add_edges_from_graph_out(u, v, u_out, v_in, edge_attr, self.node[u]['func'], self) elif v == node_id: g.add_edges_from_graph_in(u, v, u_out, v_in, edge_attr, self.node[v]['func'], self) else: g.add_edge(u, v, u_out, v_in, attr_dict=edge_attr) g.lock(develop=False) # will not develop subgraph of g self.set_from(g) developed_node.append(node_id) self._check() if level > 1: self.develop_graph(level - 1)
def test_add_nodes_from(): G = dfp.DataflowEnvironment() # or DiGraph, MultiGraph, MultiDiGraph, etc G.add_nodes_from('Helo', func=lambda x: x) K3 = dfp.DataflowEnvironment() K3.add_path([0, 1, 2], func=lambda x: x) G.add_nodes_from(K3) assert sorted(G.nodes(), key=str) == [0, 1, 2, 'H', 'e', 'l', 'o'] G.add_nodes_from([3, 4], func=lambda x: x, size=10) G.add_nodes_from([5, 6], func=lambda x: x, weight=0.4) # Use (node, attrdict) tuples to update attributes for specific # nodes. G.add_nodes_from([(7, dict(size=11)), (8, { 'color': 'blue' })], func=lambda x: x) assert G.node[7]['size'] == 11 H = dfp.DataflowEnvironment() H.add_nodes_from(G) assert H.node[7]['size'] == 11
def setUp(self): g = dfp.DataflowEnvironment(verbose=True) g.set_call_args('u') g.set_call_rets('y') g.add_node('f', lambda x: x) g.add_node('g', lambda x: x) g.add_edge('f', 'g', apply=lambda x: -.9 * x) g.add_edge_call_args('f') g.add_edge_call_rets('g') g.start() self.g = g
def test_add_lag(): lag = dfp.DataflowEnvironment() lag.args = 'lag_in' lag.add_lag('lag', None) lag.rets = 'lag_out' lag.add_edge_call_args('lag') lag.add_edge_call_rets('lag') lag.start() assert lag(11).lag_out == None assert lag('a').lag_out == 11 assert lag(None).lag_out == 'a' assert lag('b').lag_out is None assert lag(None).lag_out == 'b'
def test_add_path_common_call_attr(): G = dfp.DataflowEnvironment( name='test') # or DiGraph, MultiGraph, MultiDiGraph, etc G.add_path([0, 1, 2, 3], func=lambda x: x) assert len(G) == 4 G.add_path([10, 11, 12], [lambda x: x] * 3, weight=7) assert len(G) == 7 G.add_edge(3, 10) G.set_call_rets('x') G.set_call_args('x') G.add_edge_call_args(0) G.add_edge_call_rets(12) G.start() assert G(1) == collections.namedtuple('test', 'x')(1)
def test_DataflowEnvironment_as_Coroutine_Chained_Partial(self): """ The DataflowEnvironment instance is chained with two other coroutines """ attr_f = self.attr_f dtf = dfp.DataflowEnvironment() dtf.add_task(f, filters=dict(args=['call_args']), **attr_f) dtf.add_task(g, filters=dict(args=[f])) dtf.add_edge_call_rets(g) dtf.start() data_in = self.data_in data_out = self.data_out co = dtf.co(data_out.co()) for x in data_in.gen(): co.send(x) assert data_out.out == self.results
def test_multiple_in_one_out(self): class DataAppender2(object): def __init__(self): self.data = [] def __call__(self, data_in): self.data.append([data_in]) def DataGenerator2(): return {'a': 'a_val', 'b': 'b_val', 'c': 'c_val'} g = dfp.DataflowEnvironment() data_appender_aa = DataAppender2() data_appender_bb = DataAppender2() data_appender_cc = DataAppender2() g.add_node('data_appender_aa', data_appender_aa) g.add_node('data_appender_bb', data_appender_bb) g.add_node('data_appender_cc', data_appender_cc) g.add_node('data_generator', DataGenerator2, rets='a,b,c') g.add_edge('data_generator', 'data_appender_aa', 'a') g.add_edge('data_generator', 'data_appender_bb', 'b') g.add_edge('data_generator', 'data_appender_cc', 'c') g.start() g() g() datas_aa = data_appender_aa.data datas_bb = data_appender_bb.data datas_cc = data_appender_cc.data # g has been called two times assert len(datas_aa) == 2 assert len(datas_bb) == 2 assert len(datas_cc) == 2 for i in [0, 1]: assert len(datas_aa[i]) == 1 assert len(datas_bb[i]) == 1 assert len(datas_cc[i]) == 1 a = datas_aa[i][0] b = datas_bb[i][0] c = datas_cc[i][0] assert a == 'a_val' assert b == 'b_val' assert c == 'c_val'
def test_DataflowEnvironment1(self): """ datain is a generator f is a coroutine g is a funcion terminal is a function """ attr_f = self.attr_f data_in = self.data_in data_out = self.data_out dtf = dfp.DataflowEnvironment() dtf.add_gentask('indata', data_in.gen, initial=True) dtf.add_cotask('f', coroutine_from_func(f), filters='indata', **attr_f) dtf.add_task('g', g, filters=dict(args=['f'])) dtf.add_task('terminal', data_out.func, filters=dict(args=['g'])) dtf.start() dtf.run() assert data_out.out == self.results