示例#1
0
def run( path=None
        ,polling_interval=None
        ,topic=None
        ,always=None
        ,**_
        ):

    code, path=resolve_path(path)
    if not code.startswith("ok"):
        raise Exception("Can't resolve path...: %s" % path)

    def loginfo(path, state, *_):
        logging.info("Path state '%s': %s" % (path, state))

    ctx={"topic": topic, "path": path, "always":always}
    ctx["_path"]={
                  "previous": "ok"
                  ,"ch": partial(loginfo, path)
                  }
    tm=transition_manager(ctx)
    

    ppid=os.getppid()
    logging.info("Process pid: %s" % os.getpid())
    logging.info("Parent pid : %s" % ppid)
    logging.info("Starting loop...")    
    while True:
        if os.getppid()!=ppid:
            logging.warning("Parent terminated... exiting")
            break

        code, maybe_subdirs=getsubdirs(path)
        tm.send(("_path", code))
        if code.startswith("ok"):
            process(ctx, maybe_subdirs)
        
        start_time=time.time()
        while True:
            ir, _w, _e=select.select([sys.stdin], [], [], polling_interval)
            if len(ir):
                iline=sys.stdin.readline()
                sys.stdout.write(iline)
                
            elapsed_time = time.time() - start_time
            if elapsed_time > polling_interval:
                break
示例#2
0
def run(path_source=None
        ,polling_interval=None
        ,ext_include=None
        ,ext_exclude=None
        ,batch_size=None
        ,**_
        ):

    if ext_include is not None and ext_exclude is not None:
        raise Exception("'ee' and 'ei' options are mutually exclusive")
    
    criteria="include" if ext_include else "exclude"
    elist=ext_include or ext_exclude

    code, path_source=resolve_path(path_source)
    if not code.startswith("ok"):
        raise Exception("can't resolve path '%s'" % path_source)

    ppid=os.getppid()
    logging.info("Process pid: %s" % os.getpid())
    logging.info("Parent pid : %s" % ppid)
    logging.info("Starting loop...")    
    while True:
        if os.getppid()!=ppid:
            logging.warning("Parent terminated... exiting")
            break
        
        code, files=get_root_files(path_source)
        l=filter_files_by_ext(criteria, elist, (code, files))
        
        if l:
            for bunch in batch(l, batch_size):
                output(path_source, bunch)
        
        logging.debug("...sleeping for %s seconds" % polling_interval)
        sleep(polling_interval)
示例#3
0
def run(source_path=None, move_path=None, check_path=None, 
        batch_size=5, 
        polling_interval=None, enable_delete=False
        ,**_):

    if check_path is not None:
        ct=check_transition()

    if enable_delete and move_path is not None:
        raise Exception("Options '-mp' and '-d' are mutually exclusive")
        
    code, rp=resolve_path(source_path)
    if not code.startswith("ok"):
        raise Exception("can't resolve source path '%s'" % source_path)
    source_path=rp
    
    if move_path is not None:
        code, rp=resolve_path(move_path)
        if not code.startswith("ok"):
            raise Exception("can't resolve 'move_path' '%s'" % move_path)
        move_path=rp

        logging.info("Creating (if necessary) 'move' path: %s" % move_path)
        code, msg=mkdir_p(move_path)
        if not code.startswith("ok"):
            raise Exception("Can't create move path '%s': %s" % (move_path, str(msg)))
            
        logging.info("Checking if 'move' directory is writable")
        code, msg=can_write(move_path)
        if not code.startswith("ok"):
            raise Exception("Can't write to 'move' directory")
            
    to_skip=[]
    logging.info("Process pid: %s" % os.getpid())
    ppid=os.getppid()
    logging.info("Parent pid : %s" % ppid)
    logging.info("Starting loop...")
    while True:
        if os.getppid()!=ppid:
            logging.warning("Parent terminated... exiting")
            break
        
        if check_path is not None:
            try:    exists=os.path.exists(check_path)
            except: exists=False
            
            maybe_tr, _=ct.send(exists)
            if maybe_tr=="tr" and exists:
                logging.info("Check path: passed")
            if maybe_tr=="tr" and not exists:
                logging.info("Check path: failed - skipping")
        else:
            ## fake 'exists'
            exists=True

        if exists:        
            code, files=get_root_files(source_path)
            if not code.startswith("ok"):
                logging.error("Can't get root files from %s" % source_path)
            else:                
                ###############################################################
                files=files[:batch_size]
                try:
                    for src_file in files:
                        
                        if src_file in to_skip:
                            continue
                        
                        code, _=can_write(src_file)
                        if not code.startswith("ok"):
                            to_skip.append(src_file)
                            logging.error("Would not be able to move/delete source file '%s'... skipping streaming" % src_file)
                            continue
        
                        dst_file=None                
                        if move_path is not None:
                            bn=os.path.basename(src_file)
                            dst_file=os.path.join(move_path, bn)
                        
                        code, maybe_error=process(src_file, dst_file, enable_delete)
                        if not code.startswith("ok"):
                            to_skip.append(src_file)
                            logging.warning("Problem processing file '%s': %s" % (src_file, maybe_error))
                except BrokenPipe:
                    raise
                except KeyboardInterrupt:
                    raise
                except Exception, e:
                    logging.error("processing file '%s': %s" % (src_file, str(e)))
                ###############################################################            
        
        
        logging.debug("...sleeping for %s seconds" % polling_interval)
        sleep(polling_interval)
示例#4
0
def run(primary_path=None, compare_path=None, 
        dest_path=None,
        status_filename=None, check_path=None
        ,just_basename=None
        ,topic_name=None
        ,exts=None
        ,wait_status=None, polling_interval=None
        ,just_zppp=None, just_ppzp=None, just_com=None
        ,**_):

    if check_path is not None:
        ct=check_transition()

    if dest_path:
        code, dest_path=resolve_path(dest_path)
        if not code.startswith("ok"):
            raise Exception("can't destination path '%s'" % dest_path)
        
        logging.info("Creating (if necessary) destination path: %s" % dest_path)
        code, msg=mkdir_p(dest_path)
        if code!="ok":
            raise Exception("Can't create path: %s" % dest_path)

    code, primary_path=resolve_path(primary_path)
    if not code.startswith("ok"):
        raise Exception("can't resolve primary path '%s'" % primary_path)
    
    logging.info("Creating (if necessary) primary path: %s" % primary_path)
    mkdir_p(primary_path)
    
    code, compare_path=resolve_path(compare_path)
    if not code.startswith("ok"):
        raise Exception("can't resolve compare path '%s'" % compare_path)

    logging.info("Creating (if necessary) compare path: %s" % compare_path)
    mkdir_p(compare_path)
            
    if wait_status:
        status_path=os.path.join(primary_path, status_filename)
        logging.info("Using status file path: %s" % status_path)
    else: 
        status_path=None

    ### context for logging etc.
    ctx={
          "just_zppp": just_zppp
         ,"just_ppzp": just_ppzp
         ,"just_com":  just_com
         ,"just_list": just_zppp or just_ppzp or just_com
         
         ,"pp": primary_path
         ,"zp": compare_path
         ,"sp": status_path
         
         ,"pp_log" :{"up":    partial(ilog, primary_path)
                     ,"down":  partial(wlog, primary_path)
                     }
         ,"zp_log" :{"up":    partial(ilog, compare_path)
                     ,"down":  partial(wlog, compare_path)
                     }
         ,"topic_name": topic_name
         ,"exts": exts
         }

    ctx["tm"]=transition_manager(ctx)
    
    ppid=os.getppid()        
    logging.info("Process pid: %s" % os.getpid())
    logging.info("Parent  pid: %s" % ppid)
    logging.info("Starting loop...")
    while True:
        if os.getppid()!=ppid:
            logging.warning("Parent terminated... exiting")
            break
            
        if check_path is not None:
            try:    exists=os.path.exists(check_path)
            except: exists=False
            
            maybe_tr, _=ct.send(exists)
            if maybe_tr=="tr" and exists:
                logging.info("Check path: passed")
            if maybe_tr=="tr" and not exists:
                logging.info("Check path: failed - skipping")
        else:
            ## fake 'exists'
            exists=True

        if exists:            
            code, msg=check_if_ok(status_path, default="ok")
            maybe_process(ctx, code, msg, primary_path, compare_path, just_basename, dest_path)
        
        logging.debug("...sleeping for %s seconds" % polling_interval)
        sleep(polling_interval)
示例#5
0
def run(enable_simulate=False, 
        bucket_name=None, bucket_prefix=None, 
        path_source=None, path_dest=None,
        delete_old=False,
        **_):
    
    code, path_source=resolve_path(path_source)
    if not code.startswith("ok"):
        logging.warning("Source file '%s' can't be accessed..." % path_source)
    
    
    try:
        conn = boto.connect_s3()
    except:
        ## not much we can do
        ## but actually no remote calls are made
        ## at this point so it should be highly improbable
        raise Exception("Can't 'connect' to S3")
    
    try:
        bucket=conn.create_bucket(bucket_name)
        logging.info("Got bucket '%s'" % bucket_name)        
    except:
        raise Exception("Can't get bucket '%s'" % bucket_name)
    

    base_name=os.path.basename(path_source)
    logging.info("Basename of file to upload: %s" % base_name)

    
    root_name,version,_ext=split_path_version(base_name)
    if version is not None and len(version)>0:
        logging.info("Basename of file: %s" % version)
        logging.info("Version of file:  %s" % version)
    else: 
        version=None
        
    if root_name is None:
        root_name=base_name

    if path_dest is None:
        logging.info("Will be using '%s' as filename in bucket" % base_name)
        path_dest=base_name

    key_names=None
    to_delete=None
    if delete_old:
        logging.info("Getting bucket keys")
        code, bkeys=get_all_keys(bucket, bucket_prefix)
        if not code.startswith("ok"):
            raise Exception("Can't get bucket keys...")
        
        logging.info("Got %s key(s) to filter for 'old' files" % len(bkeys))
        
        _key_names, to_delete=filter_keys(root_name, bkeys)
        
        logging.info("Older files found: %s" % to_delete)
            
    if enable_simulate:
        logging.info("! Begin simulation...")

    code, contents=file_contents(path_source)
    if not code.startswith("ok"):
        raise Exception("Can't read file '%s'" % path_source)
    
    logging.info("Got source file contents")
    
    try:
        upload_key=S3Key(bucket)
        upload_key_name=gen_key(bucket_prefix, path_dest)
        upload_key.key=upload_key_name
            
        logging.info("Prepared S3 key: %s" % upload_key.key)
    except Exception,e:
        raise Exception("S3 key generation: %s" % str(e))
示例#6
0
def run(bucket_name=None, 
        path_source=None, 
        path_move=None,
        delete_source=False,
        polling_interval=60,
        extd=None,
        extf=None
        ,**_):
        

    if not delete_source and path_move is None:
        raise Exception("Options 'delete source' or 'move path' is required")
    
    if delete_source and path_move is not None:
        raise Exception("Options 'delete source' and 'move path' are mutually exclusive")
    
    
    #if args.enable_debug:
    #    logger=logging.getLogger()
    #    logger.setLevel(logging.DEBUG)
    
    bucket_name=bucket_name.strip()
    path_source=path_source.strip()
    
    code, p_src=resolve_path(path_source)
    if not code.startswith("ok"):
        raise Exception("Invalid source path: %s" % path_source)

    mkdir_p(p_src)

    if path_move is not None:
        code, path_move=resolve_path(path_move)
        if not code.startswith("ok"):
            raise Exception("Invalid move path: %s" % path_move)
    
        code,_=mkdir_p(path_move)
        if not code.startswith("ok"):
            raise Exception("Can't create move path: %s" % path_move)
    
    
    try:
        conn = boto.connect_s3()
    except:
        ## not much we can do
        ## but actually no remote calls are made
        ## at this point so it should be highly improbable
        raise Exception("Can't 'connect' to S3")
    
    ###################### BUCKET
    logging.info("Getting/creating bucket (unlimited retries with backoff)")
    def _get_create_bucket():
        return conn.create_bucket(bucket_name)
              
    bucket=retry(_get_create_bucket)
    logging.info("Got bucket: %s" % bucket_name)
    #############################

    logging.debug("Starting loop...")

    ppid=os.getppid()
    logging.info("Process pid: %s" % os.getpid())
    logging.info("Parent pid:  %s" % ppid)
    while True:
        if os.getppid()!=ppid:
            logging.warning("Parent terminated... exiting")
            break
        #################################################

        logging.debug("Start processing...")
        
        code, dirs=get_root_dirs(p_src)
        if not code.startswith("ok"):
            raise Warning("Source path disappeared: %s" % p_src)
        
        dirs=filter_dirs(extd, dirs)
        
        for _dir in dirs:
            process_dir(bucket, _dir, delete_source, extf, path_move)
        

        #####################################################
        logging.debug("...sleeping for %s seconds" % polling_interval)
        sleep(polling_interval)
示例#7
0
def run(
    enable_simulate=False,
    bucket_name=None,
    path_source=None,
    path_moveto=None,
    path_check=None,
    num_files=5,
    enable_delete=False,
    propagate_error=False,
    prefix=None,
    polling_interval=None,
    only_ext=None,
    filename_input_full=False,
    filename_input_regex=None,
    key_output_format=None,
    enable_progress_report=False,
    write_done=False,
    **_
):

    if key_output_format is not None:
        if filename_input_regex is None:
            raise Exception("-ifnr and -okf options work in tandem")

    if filename_input_regex is not None:

        if key_output_format is None:
            raise Exception("Input filename regex specified but no output S3 key format specified")

        logging.info("Compiling input filename regex...")
        try:
            ireg = re.compile(filename_input_regex.strip("'"))
            ofmt = key_output_format.strip("'")
        except:
            raise Exception("Can't compile input filename regex pattern")
    else:
        ireg = None
        ofmt = None

    # if args.enable_debug:
    #    logger=logging.getLogger()
    #    logger.setLevel(logging.DEBUG)

    bucket_name = bucket_name.strip()
    path_source = path_source.strip()

    try:
        prefix = prefix.strip()
    except:
        prefix = None

    try:
        path_moveto = path_moveto.strip()
    except:
        path_moveto = None

    if path_check is not None:
        code, path_check = resolve_path(path_check)
        if not code.startswith("ok"):
            logging.warning("path_check '%s' might be in error..." % path_check)

    ### VALIDATE PARAMETERS
    if not enable_delete and path_moveto is None:
        raise Exception("either -d or -m must be used")

    if enable_delete and path_moveto is not None:
        raise Exception("-d can't be used with -m")

    code, p_src = resolve_path(path_source)
    if not code.startswith("ok"):
        raise Exception("Invalid source path: %s" % path_source)

    if path_moveto is not None:
        code, p_dst = resolve_path(path_moveto)
        if not code.startswith("ok"):
            raise Exception("Invalid moveto path: %s" % path_moveto)
    else:
        p_dst = None

    ### wait for 'source' path to be available
    logging.info("Waiting for source path to be accessible... CTRL-c to stop")
    while True:
        if os.path.isdir(p_src):
            break
        sleep(1)
    logging.info("* Source path accessible")

    if path_moveto is not None:
        logging.info("Creating 'moveto' directory if required")
        code, _ = mkdir_p(p_dst)
        if not code.startswith("ok"):
            raise Exception("Can't create 'moveto' directory: %s" % p_dst)
        logging.info("* Created moveto directory")

    if not enable_simulate:
        try:
            conn = boto.connect_s3()
        except:
            ## not much we can do
            ## but actually no remote calls are made
            ## at this point so it should be highly improbable
            raise Exception("Can't 'connect' to S3")

    if not enable_simulate:
        ###################### BUCKET
        logging.info("Getting/creating bucket (unlimited retries with backoff)")

        def _get_create_bucket():
            return conn.create_bucket(bucket_name)

        bucket = retry(_get_create_bucket)
        logging.info("Got bucket")
        #############################

    if enable_simulate:
        logging.info("Begin simulation...")
    else:
        logging.debug("Starting loop...")

    ppid = os.getppid()
    logging.info("Process pid: %s" % os.getpid())
    logging.info("Parent pid:  %s" % ppid)
    while True:
        if os.getppid() != ppid:
            logging.warning("Parent terminated... exiting")
            break
        #################################################

        _code, path_exists = safe_path_exists(path_check)

        if path_check is None or path_exists:
            try:
                logging.debug("Start processing...")
                count = 0
                gen = gen_walk(p_src, max_files=num_files, only_ext=only_ext)

                for src_filename in gen:

                    if enable_progress_report:
                        logging.info("Processing file: %s" % src_filename)

                    if write_done:
                        if is_done_file(src_filename):
                            continue

                    try:
                        s3key_name = gen_s3_key(ireg, ofmt, p_src, src_filename, prefix, filename_input_full)
                    except Exception, e:
                        raise Exception(
                            "Error generating S3 key... check your command line parameters... use the 'simulate' facility: %s"
                            % e
                        )

                    if enable_simulate:
                        simulate(src_filename, s3key_name, enable_delete, p_dst)
                    else:
                        k = S3Key(bucket)
                        k.key = s3key_name
                        was_uploaded = process_file(
                            enable_progress_report,
                            bucket_name,
                            prefix,
                            k,
                            src_filename,
                            p_dst,
                            enable_delete,
                            propagate_error,
                            write_done,
                        )
                        if was_uploaded:
                            count = count + 1

            except Exception, e:
                logging.error("Error processing files...(%s)" % str(e))
        else:
            logging.info()

        if count > 0:
            logging.info("Progress> uploaded %s files" % count)

        #####################################################
        logging.debug("...sleeping for %s seconds" % polling_interval)
        sleep(polling_interval)