def package_spark(source_dir: str, target_file: str) -> int: if not os.path.isdir(source_dir): eprint(source_dir + " is not a valid directory") return 1 req_file = os.path.join(source_dir, "requirements.txt") if not os.path.exists(req_file): eprint("requirements.txt not found in " + source_dir) return 1 tmp_dir = tempfile.mkdtemp() logging.debug("copying app files to " + tmp_dir) copy_tree(source_dir, tmp_dir) logging.debug("resolving pip dependencies into " + tmp_dir) pip_out = _resolve_pip_dependencies(tmp_dir) if pip_out != 0: eprint("there was an error processing " + req_file) return 1 logging.debug("creating package " + target_file) _create_zip(tmp_dir, target_file) print("spark app generated successfully: " + os.path.abspath(target_file)) return 0
def transform(runner: Optional[str], stream_dtc_file: Optional[str], window_dtc_file: Optional[str], data_processor: Optional[str], raw_json_files: List[str]) -> int: if stream_dtc_file is None and window_dtc_file is None: stream_dtc_file, window_dtc_file = get_stream_window_dtc_files( get_valid_yml_files(get_yml_files())) if stream_dtc_file is None: eprint('Streaming DTC file not provided and could not be found in ' 'the current directory.') return 1 if not runner: runner = 'local' if not data_processor: data_processor = 'simple' if runner not in RUNNER_CLASS: eprint('Unknown runner: \'{}\'. Possible values: {}'.format( runner, list(RUNNER_CLASS.keys()))) return 1 if data_processor not in DATA_PROCESSOR_CLASS: eprint('Unknown data-processor: \'{}\'. Possible values: {}'.format( runner, list(DATA_PROCESSOR_CLASS.keys()))) return 1 data_processor_obj = DATA_PROCESSOR_CLASS[data_processor]() if runner == 'local': return transform_local(stream_dtc_file, window_dtc_file, raw_json_files, data_processor_obj) else: return transform_spark(stream_dtc_file, window_dtc_file, raw_json_files, data_processor_obj)
def validate_file(bts_file: str) -> int: print('Running validation on {}'.format(bts_file)) try: bts_dict = yaml.safe_load(open(bts_file, 'r', encoding='utf-8')) validate(bts_dict) print('Document is valid') return 0 except yaml.YAMLError as err: eprint('Invalid yaml') eprint(str(err)) return 1 except SchemaError as err: eprint(str(err)) return 1 except Exception as err: exception_value = sys.exc_info()[1] logging.error(exception_value) eprint('There was an error parsing the document. Error:\n' + str(err)) return 1