def test_init(self): cli.init(dir_path, 'ut', 'desc') with open(file_name, 'r+') as f: yaml = '\n'.join(f.readlines()) flow = dataflow.from_yaml(yaml) flow2 = Dataflow.from_python_objects('ut', 'desc', []) self.assertEqual(flow._steps, flow2._steps)
def add_source_step(dir: str, name: str, uri: str, format_type: str): file_name = f'{dir}/dataforj.yaml' if not os.path.exists(file_name): raise Exception('There is no Dataforj project in this directory') else: with open(file_name, 'r+') as f: yaml = '\n'.join(f.readlines()) flow = dataflow.from_yaml(yaml) updated_flow = api.add_source_step(flow, name, uri, format_type) f.seek(0) f.write(updated_flow.to_yaml()) f.truncate()
def debug_step(dir: str, env_name: str, step: str): project_file_name = f'{dir}/dataforj.yaml' if not os.path.exists(project_file_name): raise Exception(f'There is no Dataforj project in the directory \ [{dir}]') else: with open(project_file_name, 'r+') as project_file: project_yaml = '\n'.join(project_file.readlines()) env = DataforjEnv('flow.name', env_name) yaml_plus_vars = project_yaml \ .format_map(env.env_config['dataflow-config']) flow = dataflow.from_yaml(yaml_plus_vars) flow.debug_step(env, step)
def add_pyspark_step(dir: str, name: str, depends_on: list, pyspark_file_path: str): file_name = f'{dir}/dataforj.yaml' if not os.path.exists(file_name): raise Exception('There is no Dataforj project in this directory') else: with open(file_name, 'r+') as f: yaml = '\n'.join(f.readlines()) flow = dataflow.from_yaml(yaml) updated_flow = api.add_pyspark_step(flow, name, depends_on, pyspark_file_path) f.seek(0) f.write(updated_flow.to_yaml()) f.truncate()
def open_flow(dir: str, env_name: str): ''' open a project in the directory provided ''' file_name = f'{dir}/dataforj.yaml' if not os.path.exists(file_name): raise Exception('There is no Dataforj project in this directory') else: with open(file_name, 'r+') as f: yaml = '\n'.join(f.readlines()) env = DataforjEnv('flow.name', env_name) yaml_plus_vars = yaml \ .format_map(env.env_config['dataflow-config']) flow = dataflow.from_yaml(yaml_plus_vars) return flow
def test_from_yaml(self): yaml_flow = dataflow.from_yaml(simple_yaml_text) self.assertEqual(yaml_flow.to_yaml(), flow_simple.to_yaml())