def run( path=None ,polling_interval=None ,topic=None ,always=None ,**_ ): code, path=resolve_path(path) if not code.startswith("ok"): raise Exception("Can't resolve path...: %s" % path) def loginfo(path, state, *_): logging.info("Path state '%s': %s" % (path, state)) ctx={"topic": topic, "path": path, "always":always} ctx["_path"]={ "previous": "ok" ,"ch": partial(loginfo, path) } tm=transition_manager(ctx) ppid=os.getppid() logging.info("Process pid: %s" % os.getpid()) logging.info("Parent pid : %s" % ppid) logging.info("Starting loop...") while True: if os.getppid()!=ppid: logging.warning("Parent terminated... exiting") break code, maybe_subdirs=getsubdirs(path) tm.send(("_path", code)) if code.startswith("ok"): process(ctx, maybe_subdirs) start_time=time.time() while True: ir, _w, _e=select.select([sys.stdin], [], [], polling_interval) if len(ir): iline=sys.stdin.readline() sys.stdout.write(iline) elapsed_time = time.time() - start_time if elapsed_time > polling_interval: break
def run(path_source=None ,polling_interval=None ,ext_include=None ,ext_exclude=None ,batch_size=None ,**_ ): if ext_include is not None and ext_exclude is not None: raise Exception("'ee' and 'ei' options are mutually exclusive") criteria="include" if ext_include else "exclude" elist=ext_include or ext_exclude code, path_source=resolve_path(path_source) if not code.startswith("ok"): raise Exception("can't resolve path '%s'" % path_source) ppid=os.getppid() logging.info("Process pid: %s" % os.getpid()) logging.info("Parent pid : %s" % ppid) logging.info("Starting loop...") while True: if os.getppid()!=ppid: logging.warning("Parent terminated... exiting") break code, files=get_root_files(path_source) l=filter_files_by_ext(criteria, elist, (code, files)) if l: for bunch in batch(l, batch_size): output(path_source, bunch) logging.debug("...sleeping for %s seconds" % polling_interval) sleep(polling_interval)
def run(source_path=None, move_path=None, check_path=None, batch_size=5, polling_interval=None, enable_delete=False ,**_): if check_path is not None: ct=check_transition() if enable_delete and move_path is not None: raise Exception("Options '-mp' and '-d' are mutually exclusive") code, rp=resolve_path(source_path) if not code.startswith("ok"): raise Exception("can't resolve source path '%s'" % source_path) source_path=rp if move_path is not None: code, rp=resolve_path(move_path) if not code.startswith("ok"): raise Exception("can't resolve 'move_path' '%s'" % move_path) move_path=rp logging.info("Creating (if necessary) 'move' path: %s" % move_path) code, msg=mkdir_p(move_path) if not code.startswith("ok"): raise Exception("Can't create move path '%s': %s" % (move_path, str(msg))) logging.info("Checking if 'move' directory is writable") code, msg=can_write(move_path) if not code.startswith("ok"): raise Exception("Can't write to 'move' directory") to_skip=[] logging.info("Process pid: %s" % os.getpid()) ppid=os.getppid() logging.info("Parent pid : %s" % ppid) logging.info("Starting loop...") while True: if os.getppid()!=ppid: logging.warning("Parent terminated... exiting") break if check_path is not None: try: exists=os.path.exists(check_path) except: exists=False maybe_tr, _=ct.send(exists) if maybe_tr=="tr" and exists: logging.info("Check path: passed") if maybe_tr=="tr" and not exists: logging.info("Check path: failed - skipping") else: ## fake 'exists' exists=True if exists: code, files=get_root_files(source_path) if not code.startswith("ok"): logging.error("Can't get root files from %s" % source_path) else: ############################################################### files=files[:batch_size] try: for src_file in files: if src_file in to_skip: continue code, _=can_write(src_file) if not code.startswith("ok"): to_skip.append(src_file) logging.error("Would not be able to move/delete source file '%s'... skipping streaming" % src_file) continue dst_file=None if move_path is not None: bn=os.path.basename(src_file) dst_file=os.path.join(move_path, bn) code, maybe_error=process(src_file, dst_file, enable_delete) if not code.startswith("ok"): to_skip.append(src_file) logging.warning("Problem processing file '%s': %s" % (src_file, maybe_error)) except BrokenPipe: raise except KeyboardInterrupt: raise except Exception, e: logging.error("processing file '%s': %s" % (src_file, str(e))) ############################################################### logging.debug("...sleeping for %s seconds" % polling_interval) sleep(polling_interval)
def run(primary_path=None, compare_path=None, dest_path=None, status_filename=None, check_path=None ,just_basename=None ,topic_name=None ,exts=None ,wait_status=None, polling_interval=None ,just_zppp=None, just_ppzp=None, just_com=None ,**_): if check_path is not None: ct=check_transition() if dest_path: code, dest_path=resolve_path(dest_path) if not code.startswith("ok"): raise Exception("can't destination path '%s'" % dest_path) logging.info("Creating (if necessary) destination path: %s" % dest_path) code, msg=mkdir_p(dest_path) if code!="ok": raise Exception("Can't create path: %s" % dest_path) code, primary_path=resolve_path(primary_path) if not code.startswith("ok"): raise Exception("can't resolve primary path '%s'" % primary_path) logging.info("Creating (if necessary) primary path: %s" % primary_path) mkdir_p(primary_path) code, compare_path=resolve_path(compare_path) if not code.startswith("ok"): raise Exception("can't resolve compare path '%s'" % compare_path) logging.info("Creating (if necessary) compare path: %s" % compare_path) mkdir_p(compare_path) if wait_status: status_path=os.path.join(primary_path, status_filename) logging.info("Using status file path: %s" % status_path) else: status_path=None ### context for logging etc. ctx={ "just_zppp": just_zppp ,"just_ppzp": just_ppzp ,"just_com": just_com ,"just_list": just_zppp or just_ppzp or just_com ,"pp": primary_path ,"zp": compare_path ,"sp": status_path ,"pp_log" :{"up": partial(ilog, primary_path) ,"down": partial(wlog, primary_path) } ,"zp_log" :{"up": partial(ilog, compare_path) ,"down": partial(wlog, compare_path) } ,"topic_name": topic_name ,"exts": exts } ctx["tm"]=transition_manager(ctx) ppid=os.getppid() logging.info("Process pid: %s" % os.getpid()) logging.info("Parent pid: %s" % ppid) logging.info("Starting loop...") while True: if os.getppid()!=ppid: logging.warning("Parent terminated... exiting") break if check_path is not None: try: exists=os.path.exists(check_path) except: exists=False maybe_tr, _=ct.send(exists) if maybe_tr=="tr" and exists: logging.info("Check path: passed") if maybe_tr=="tr" and not exists: logging.info("Check path: failed - skipping") else: ## fake 'exists' exists=True if exists: code, msg=check_if_ok(status_path, default="ok") maybe_process(ctx, code, msg, primary_path, compare_path, just_basename, dest_path) logging.debug("...sleeping for %s seconds" % polling_interval) sleep(polling_interval)
def run(enable_simulate=False, bucket_name=None, bucket_prefix=None, path_source=None, path_dest=None, delete_old=False, **_): code, path_source=resolve_path(path_source) if not code.startswith("ok"): logging.warning("Source file '%s' can't be accessed..." % path_source) try: conn = boto.connect_s3() except: ## not much we can do ## but actually no remote calls are made ## at this point so it should be highly improbable raise Exception("Can't 'connect' to S3") try: bucket=conn.create_bucket(bucket_name) logging.info("Got bucket '%s'" % bucket_name) except: raise Exception("Can't get bucket '%s'" % bucket_name) base_name=os.path.basename(path_source) logging.info("Basename of file to upload: %s" % base_name) root_name,version,_ext=split_path_version(base_name) if version is not None and len(version)>0: logging.info("Basename of file: %s" % version) logging.info("Version of file: %s" % version) else: version=None if root_name is None: root_name=base_name if path_dest is None: logging.info("Will be using '%s' as filename in bucket" % base_name) path_dest=base_name key_names=None to_delete=None if delete_old: logging.info("Getting bucket keys") code, bkeys=get_all_keys(bucket, bucket_prefix) if not code.startswith("ok"): raise Exception("Can't get bucket keys...") logging.info("Got %s key(s) to filter for 'old' files" % len(bkeys)) _key_names, to_delete=filter_keys(root_name, bkeys) logging.info("Older files found: %s" % to_delete) if enable_simulate: logging.info("! Begin simulation...") code, contents=file_contents(path_source) if not code.startswith("ok"): raise Exception("Can't read file '%s'" % path_source) logging.info("Got source file contents") try: upload_key=S3Key(bucket) upload_key_name=gen_key(bucket_prefix, path_dest) upload_key.key=upload_key_name logging.info("Prepared S3 key: %s" % upload_key.key) except Exception,e: raise Exception("S3 key generation: %s" % str(e))
def run(bucket_name=None, path_source=None, path_move=None, delete_source=False, polling_interval=60, extd=None, extf=None ,**_): if not delete_source and path_move is None: raise Exception("Options 'delete source' or 'move path' is required") if delete_source and path_move is not None: raise Exception("Options 'delete source' and 'move path' are mutually exclusive") #if args.enable_debug: # logger=logging.getLogger() # logger.setLevel(logging.DEBUG) bucket_name=bucket_name.strip() path_source=path_source.strip() code, p_src=resolve_path(path_source) if not code.startswith("ok"): raise Exception("Invalid source path: %s" % path_source) mkdir_p(p_src) if path_move is not None: code, path_move=resolve_path(path_move) if not code.startswith("ok"): raise Exception("Invalid move path: %s" % path_move) code,_=mkdir_p(path_move) if not code.startswith("ok"): raise Exception("Can't create move path: %s" % path_move) try: conn = boto.connect_s3() except: ## not much we can do ## but actually no remote calls are made ## at this point so it should be highly improbable raise Exception("Can't 'connect' to S3") ###################### BUCKET logging.info("Getting/creating bucket (unlimited retries with backoff)") def _get_create_bucket(): return conn.create_bucket(bucket_name) bucket=retry(_get_create_bucket) logging.info("Got bucket: %s" % bucket_name) ############################# logging.debug("Starting loop...") ppid=os.getppid() logging.info("Process pid: %s" % os.getpid()) logging.info("Parent pid: %s" % ppid) while True: if os.getppid()!=ppid: logging.warning("Parent terminated... exiting") break ################################################# logging.debug("Start processing...") code, dirs=get_root_dirs(p_src) if not code.startswith("ok"): raise Warning("Source path disappeared: %s" % p_src) dirs=filter_dirs(extd, dirs) for _dir in dirs: process_dir(bucket, _dir, delete_source, extf, path_move) ##################################################### logging.debug("...sleeping for %s seconds" % polling_interval) sleep(polling_interval)
def run( enable_simulate=False, bucket_name=None, path_source=None, path_moveto=None, path_check=None, num_files=5, enable_delete=False, propagate_error=False, prefix=None, polling_interval=None, only_ext=None, filename_input_full=False, filename_input_regex=None, key_output_format=None, enable_progress_report=False, write_done=False, **_ ): if key_output_format is not None: if filename_input_regex is None: raise Exception("-ifnr and -okf options work in tandem") if filename_input_regex is not None: if key_output_format is None: raise Exception("Input filename regex specified but no output S3 key format specified") logging.info("Compiling input filename regex...") try: ireg = re.compile(filename_input_regex.strip("'")) ofmt = key_output_format.strip("'") except: raise Exception("Can't compile input filename regex pattern") else: ireg = None ofmt = None # if args.enable_debug: # logger=logging.getLogger() # logger.setLevel(logging.DEBUG) bucket_name = bucket_name.strip() path_source = path_source.strip() try: prefix = prefix.strip() except: prefix = None try: path_moveto = path_moveto.strip() except: path_moveto = None if path_check is not None: code, path_check = resolve_path(path_check) if not code.startswith("ok"): logging.warning("path_check '%s' might be in error..." % path_check) ### VALIDATE PARAMETERS if not enable_delete and path_moveto is None: raise Exception("either -d or -m must be used") if enable_delete and path_moveto is not None: raise Exception("-d can't be used with -m") code, p_src = resolve_path(path_source) if not code.startswith("ok"): raise Exception("Invalid source path: %s" % path_source) if path_moveto is not None: code, p_dst = resolve_path(path_moveto) if not code.startswith("ok"): raise Exception("Invalid moveto path: %s" % path_moveto) else: p_dst = None ### wait for 'source' path to be available logging.info("Waiting for source path to be accessible... CTRL-c to stop") while True: if os.path.isdir(p_src): break sleep(1) logging.info("* Source path accessible") if path_moveto is not None: logging.info("Creating 'moveto' directory if required") code, _ = mkdir_p(p_dst) if not code.startswith("ok"): raise Exception("Can't create 'moveto' directory: %s" % p_dst) logging.info("* Created moveto directory") if not enable_simulate: try: conn = boto.connect_s3() except: ## not much we can do ## but actually no remote calls are made ## at this point so it should be highly improbable raise Exception("Can't 'connect' to S3") if not enable_simulate: ###################### BUCKET logging.info("Getting/creating bucket (unlimited retries with backoff)") def _get_create_bucket(): return conn.create_bucket(bucket_name) bucket = retry(_get_create_bucket) logging.info("Got bucket") ############################# if enable_simulate: logging.info("Begin simulation...") else: logging.debug("Starting loop...") ppid = os.getppid() logging.info("Process pid: %s" % os.getpid()) logging.info("Parent pid: %s" % ppid) while True: if os.getppid() != ppid: logging.warning("Parent terminated... exiting") break ################################################# _code, path_exists = safe_path_exists(path_check) if path_check is None or path_exists: try: logging.debug("Start processing...") count = 0 gen = gen_walk(p_src, max_files=num_files, only_ext=only_ext) for src_filename in gen: if enable_progress_report: logging.info("Processing file: %s" % src_filename) if write_done: if is_done_file(src_filename): continue try: s3key_name = gen_s3_key(ireg, ofmt, p_src, src_filename, prefix, filename_input_full) except Exception, e: raise Exception( "Error generating S3 key... check your command line parameters... use the 'simulate' facility: %s" % e ) if enable_simulate: simulate(src_filename, s3key_name, enable_delete, p_dst) else: k = S3Key(bucket) k.key = s3key_name was_uploaded = process_file( enable_progress_report, bucket_name, prefix, k, src_filename, p_dst, enable_delete, propagate_error, write_done, ) if was_uploaded: count = count + 1 except Exception, e: logging.error("Error processing files...(%s)" % str(e)) else: logging.info() if count > 0: logging.info("Progress> uploaded %s files" % count) ##################################################### logging.debug("...sleeping for %s seconds" % polling_interval) sleep(polling_interval)