def run_pipeline(self): print("Running Pipeline") ingest_process = ingest.Ingest() ingest_process.ingest_data() tranform_process = transform.Transform() tranform_process.transform_data() persist_process = persist.Persist() persist_process.persist_data()
def run_pipeline(self): print("Running Pipeline") ingest_process = ingest.Ingest(self.spark) df = ingest_process.ingest_data() df.show() tranform_process = transform.Transform(self.spark) transformed_df = tranform_process.transform_data(df) transformed_df.show() persist_process = persist.Persist(self.spark) persist_process.persist_data(transformed_df) return
def run_pipeline(self): logging.info('run_pipeline method started') ingest_process = ingest.Ingest(self.spark) df = ingest_process.ingest_data() df.show() tranform_process = transform.Transform(self.spark) transformed_df = tranform_process.transform_data(df) transformed_df.show() persist_process = persist.Persist(self.spark) persist_process.persist_data(transformed_df) logging.info('run_pipeline method ended') return
def run_pipeline(self): try: logging.info('run_pipeline method started') ingest_process = ingest.Ingest(self.spark) df = ingest_process.ingest_data() df.show() tranform_process = transform.Transform(self.spark) transformed_df = tranform_process.transform_data(df) transformed_df.show() persist_process = persist.Persist(self.spark) persist_process.persist_data(transformed_df) logging.info('run_pipeline method ended') except Exception as exp: logging.error("An error occured while running the pipeline > " + str(exp)) # send email notification # log error to database sys.exit(1) return
err = args.get_invalid_modules_in_args() if len(err) > 0: print("ERR - Unknow module(s) : " + ','.join(err)) sys.exit() # conf.yml, conf.d/*.yml cmt.CONF = conf.load_conf() # if cron mode, introduce a small uase (offset) tô spread the load on metrology servers if cmt.ARGS['cron']: mypause = conf.get_startoffset() time.sleep(mypause) # Persist cmt.PERSIST = persist.Persist(file=cmt.DEFAULT_PERSIST_FILE) if cmt.ARGS["nopersist"]: cmt.PERSIST.dict = {} lastrun = cmt.PERSIST.get_key("cmt_last_run", 0) # remote conf (url) or cached conf conf.load_conf_remote(cmt.CONF) # check master switch / CMT disabled ? ts_global_enable = cmt.CONF['global'].get('enable', 'no') if not conf.is_timeswitch_on(ts_global_enable): logit("CMT globally disabled by conf") sys.exit() # CLI : check config option ? if cmt.ARGS["checkconfig"]: