def run(polling_interval=None, source_url=None, batch_size=None, format_json=None, propagate_error=None, check_path=None ,**_): proc=process(source_url, propagate_error, format_json, batch_size) if check_path is not None: ct=check_transition() ppid=os.getppid() logging.info("Process pid: %s" % os.getpid()) logging.info("Parent pid: %s" % ppid) logging.info("Starting loop...") while True: if os.getppid()!=ppid: logging.warning("Parent terminated... exiting") break if check_path is not None: try: exists=os.path.exists(check_path) except: exists=False maybe_tr, _=ct.send(exists) if maybe_tr=="tr" and exists: logging.info("Check path: passed") if maybe_tr=="tr" and not exists: logging.info("Check path: failed - skipping") else: ## fake 'exists' exists=True if exists: ######################################################### status, (code, headers, data)=fetch(source_url) if status.startswith("ok"): proc.send((code, headers, data)) else: if propagate_error: stdout('''{"status":"error", "kind":"fetch", "source_url":"%s", "http_code":"%s"}''' % (source_url, code)) ######################################################### logging.debug("...sleeping for %s seconds" % polling_interval) sleep(polling_interval)
def run(source_path=None, move_path=None, check_path=None, batch_size=5, polling_interval=None, enable_delete=False ,**_): if check_path is not None: ct=check_transition() if enable_delete and move_path is not None: raise Exception("Options '-mp' and '-d' are mutually exclusive") code, rp=resolve_path(source_path) if not code.startswith("ok"): raise Exception("can't resolve source path '%s'" % source_path) source_path=rp if move_path is not None: code, rp=resolve_path(move_path) if not code.startswith("ok"): raise Exception("can't resolve 'move_path' '%s'" % move_path) move_path=rp logging.info("Creating (if necessary) 'move' path: %s" % move_path) code, msg=mkdir_p(move_path) if not code.startswith("ok"): raise Exception("Can't create move path '%s': %s" % (move_path, str(msg))) logging.info("Checking if 'move' directory is writable") code, msg=can_write(move_path) if not code.startswith("ok"): raise Exception("Can't write to 'move' directory") to_skip=[] logging.info("Process pid: %s" % os.getpid()) ppid=os.getppid() logging.info("Parent pid : %s" % ppid) logging.info("Starting loop...") while True: if os.getppid()!=ppid: logging.warning("Parent terminated... exiting") break if check_path is not None: try: exists=os.path.exists(check_path) except: exists=False maybe_tr, _=ct.send(exists) if maybe_tr=="tr" and exists: logging.info("Check path: passed") if maybe_tr=="tr" and not exists: logging.info("Check path: failed - skipping") else: ## fake 'exists' exists=True if exists: code, files=get_root_files(source_path) if not code.startswith("ok"): logging.error("Can't get root files from %s" % source_path) else: ############################################################### files=files[:batch_size] try: for src_file in files: if src_file in to_skip: continue code, _=can_write(src_file) if not code.startswith("ok"): to_skip.append(src_file) logging.error("Would not be able to move/delete source file '%s'... skipping streaming" % src_file) continue dst_file=None if move_path is not None: bn=os.path.basename(src_file) dst_file=os.path.join(move_path, bn) code, maybe_error=process(src_file, dst_file, enable_delete) if not code.startswith("ok"): to_skip.append(src_file) logging.warning("Problem processing file '%s': %s" % (src_file, maybe_error)) except BrokenPipe: raise except KeyboardInterrupt: raise except Exception, e: logging.error("processing file '%s': %s" % (src_file, str(e))) ############################################################### logging.debug("...sleeping for %s seconds" % polling_interval) sleep(polling_interval)
def run(primary_path=None, compare_path=None, dest_path=None, status_filename=None, check_path=None ,just_basename=None ,topic_name=None ,exts=None ,wait_status=None, polling_interval=None ,just_zppp=None, just_ppzp=None, just_com=None ,**_): if check_path is not None: ct=check_transition() if dest_path: code, dest_path=resolve_path(dest_path) if not code.startswith("ok"): raise Exception("can't destination path '%s'" % dest_path) logging.info("Creating (if necessary) destination path: %s" % dest_path) code, msg=mkdir_p(dest_path) if code!="ok": raise Exception("Can't create path: %s" % dest_path) code, primary_path=resolve_path(primary_path) if not code.startswith("ok"): raise Exception("can't resolve primary path '%s'" % primary_path) logging.info("Creating (if necessary) primary path: %s" % primary_path) mkdir_p(primary_path) code, compare_path=resolve_path(compare_path) if not code.startswith("ok"): raise Exception("can't resolve compare path '%s'" % compare_path) logging.info("Creating (if necessary) compare path: %s" % compare_path) mkdir_p(compare_path) if wait_status: status_path=os.path.join(primary_path, status_filename) logging.info("Using status file path: %s" % status_path) else: status_path=None ### context for logging etc. ctx={ "just_zppp": just_zppp ,"just_ppzp": just_ppzp ,"just_com": just_com ,"just_list": just_zppp or just_ppzp or just_com ,"pp": primary_path ,"zp": compare_path ,"sp": status_path ,"pp_log" :{"up": partial(ilog, primary_path) ,"down": partial(wlog, primary_path) } ,"zp_log" :{"up": partial(ilog, compare_path) ,"down": partial(wlog, compare_path) } ,"topic_name": topic_name ,"exts": exts } ctx["tm"]=transition_manager(ctx) ppid=os.getppid() logging.info("Process pid: %s" % os.getpid()) logging.info("Parent pid: %s" % ppid) logging.info("Starting loop...") while True: if os.getppid()!=ppid: logging.warning("Parent terminated... exiting") break if check_path is not None: try: exists=os.path.exists(check_path) except: exists=False maybe_tr, _=ct.send(exists) if maybe_tr=="tr" and exists: logging.info("Check path: passed") if maybe_tr=="tr" and not exists: logging.info("Check path: failed - skipping") else: ## fake 'exists' exists=True if exists: code, msg=check_if_ok(status_path, default="ok") maybe_process(ctx, code, msg, primary_path, compare_path, just_basename, dest_path) logging.debug("...sleeping for %s seconds" % polling_interval) sleep(polling_interval)
def run(source_path=None, dest_path=None, check_path=None, batch_size=5, polling_interval=None, delete_fetch_error=False ,**_): if check_path is not None: ct=check_transition() logging.info("Creating (if necessary) destination path: %s" % dest_path) code, msg=mkdir_p(dest_path) if not code.startswith("ok"): raise Exception("Can't create destination path '%s': %s" % (dest_path, str(msg))) to_skip=[] ppid=os.getppid() logging.info("Process pid: %s" % os.getpid()) logging.info("Parent pid : %s" % ppid) logging.info("Starting loop...") while True: if os.getppid()!=ppid: logging.warning("Parent terminated... exiting") break if check_path is not None: try: exists=os.path.exists(check_path) except: exists=False maybe_tr, _=ct.send(exists) if maybe_tr=="tr" and exists: logging.info("Check path: passed") if maybe_tr=="tr" and not exists: logging.info("Check path: failed - skipping") else: ## fake 'exists' exists=True if exists: code, files=get_root_files(source_path) if not code.startswith("ok"): logging.error("Can't get root files from %s" % source_path) continue ############################################################### files=files[:batch_size] try: for src_file in files: if src_file in to_skip: continue code, _=can_write(src_file) if not code.startswith("ok"): to_skip.append(src_file) logging.error("Would not be able to delete source file '%s'... skipping download" % src_file) continue process(src_file, dest_path, delete_fetch_error) except BrokenPipe: raise except Exception, e: logging.error("processing file '%s': %s" % (src_file, str(e))) ############################################################### logging.debug("...waiting for %s seconds (max)" % polling_interval) ### Implement a "pass-through" for stdin --> stdout ### whilst also handling a maximum time-out start_time=time.time() while True: ir, _w, _e=select.select([sys.stdin], [], [], polling_interval) if len(ir): iline=sys.stdin.readline() sys.stdout.write(iline) elapsed_time = time.time() - start_time if elapsed_time > polling_interval: break