def main(): ## Args parser = argparse.ArgumentParser() parser.add_argument('--bucket', required=True, help='Bucket') parser.add_argument('--endpoint', default=boto.s3.connection.NoHostProvided, help='S3 endpoint') parser.add_argument('--profile', help='Boto profile used for connection') args = parser.parse_args() ## S3 Connection bucket = S3Connection(suppress_consec_slashes=False, host=args.endpoint, is_secure=True, profile_name=args.profile).get_bucket(args.bucket) ## Hadoop Counters totalsize = 0 ## In a Stream? start_index = campanile.stream_index() ## Process input for line in fileinput.input("-"): if line.startswith('#'): continue delim, prefix = line.rstrip('\n').split('\t')[start_index].split(',') for key in bucket.list(prefix=prefix, delimiter=delim): if key.__class__.__name__ == "Prefix": continue ## Don't include glacier obejcts if key.storage_class == 'GLACIER': continue print "%s\t%s\t%s\t%s" % (key.name.encode('utf-8'), key.etag.replace("\"", ""), key.size, parse_ts(key.last_modified)) ## Log stats campanile.counter(args.bucket, "Bytes", key.size)
def main(): ## Args parser = argparse.ArgumentParser() parser.add_argument('--bucket', required=True, help='Bucket') parser.add_argument('--endpoint', default=boto.s3.connection.NoHostProvided, help='S3 endpoint') parser.add_argument('--profile', help='Boto profile used for connection') args = parser.parse_args() ## S3 Connection bucket = S3Connection(suppress_consec_slashes=False, host=args.endpoint,is_secure=True, profile_name=args.profile).get_bucket(args.bucket) ## Hadoop Counters totalsize = 0 ## In a Stream? start_index = campanile.stream_index() ## Process input for line in fileinput.input("-"): if line.startswith('#'): continue delim, prefix = line.rstrip('\n').split('\t')[start_index].split(',') for key in bucket.list(prefix=prefix,delimiter=delim): if key.__class__.__name__ == "Prefix": continue ## Don't include glacier obejcts if key.storage_class == 'GLACIER': continue print "%s\t%s\t%s\t%s" % (key.name.encode('utf-8'), key.etag.replace("\"", ""), key.size, parse_ts(key.last_modified)) ## Log stats campanile.counter(args.bucket, "Bytes", key.size)
def main(): ## Args parser = argparse.ArgumentParser() parser.add_argument('--src-bucket', required=True, dest='src', help='Source S3 bucket') parser.add_argument('--dst-bucket', required=True, dest='dst', help='Destination S3 bucket') parser.add_argument('--src-endpoint', default=boto.s3.connection.NoHostProvided, help='S3 source endpoint') parser.add_argument('--dst-endpoint', default=boto.s3.connection.NoHostProvided, help='S3 destination endpoint') parser.add_argument('--src-profile', help='Boto profile used for source connection') parser.add_argument('--dst-profile', help='Boto profile used for destination connection') parser.add_argument('--config', '-c', default="./campanile.cfg", help='Path to config file') args = parser.parse_args() ## Config Object cfgfiles = campanile.cfg_file_locations() cfgfiles.insert(0, args.config) c = ConfigParser.SafeConfigParser({'ephemeral':'/tmp'}) c.read(cfgfiles) ## S3 Bucket Connections src_bucket = S3Connection(suppress_consec_slashes=False,\ host=args.src_endpoint,is_secure=True, profile_name=args.src_profile).\ get_bucket(args.src,validate=False) dst_bucket = S3Connection(suppress_consec_slashes=False,\ host=args.dst_endpoint,is_secure=True, profile_name=args.dst_profile).\ get_bucket(args.dst,validate=False) ## Reporting Counters files = 0 movedbytes = 0 ## Select random tmpdir to distribute load across disks tmpdir = random.choice(c.get('DEFAULT',"ephemeral").split(',')) start_index = campanile.stream_index() for line in fileinput.input("-"): name, etag, size, mtime, mid, part, partcount, startbyte, stopbyte \ = line.rstrip('\n').split('\t')[start_index:] srckey = src_bucket.get_key(name, validate=False) dstkey = dst_bucket.get_key(name, validate=False) if mid == campanile.NULL: headers={} report_name = name expected_size = int(size) else: headers={'Range' : "bytes=%s-%s" % (startbyte, stopbyte)} report_name = "%s-%s" % (name, 'part') expected_size = int(stopbyte) - int(startbyte) + 1 with tempfile.SpooledTemporaryFile(max_size=c.getint('DEFAULT',\ 'maxtmpsize'),dir=tmpdir) as fp: ## Download p = campanile.FileProgress(name, verbose=1) srckey.get_contents_to_file(fp, headers=headers, cb=p.progress) if fp.tell() != expected_size: raise Exception("Something bad happened for %s. \ Expecting %s, but got %s" % \ (report_name, expected_size, fp.tell())) campanile.counter(args.src, "OutputBytes", size) fp.flush fp.seek(0) if mid == campanile.NULL: dstkey.cache_control= srckey.cache_control dstkey.content_type = srckey.content_type dstkey.content_encoding = srckey.content_encoding dstkey.content_disposition = srckey.content_disposition dstkey.content_language = srckey.content_language dstkey.metadata = srckey.metadata dstkey.md5 = srckey.md5 report_name = name else: mp = boto.s3.multipart.MultiPartUpload(bucket=dst_bucket) mp.id = mid mp.key_name = name report_name = "%s-%s" % (name, part) ## Upload p = campanile.FileProgress(report_name, verbose=1) if mid == campanile.NULL: dstkey.set_contents_from_file(fp, encrypt_key=srckey.encrypted, cb=p.progress) newetag = dstkey.etag.replace("\"","") else: mpart = mp.upload_part_from_file(fp,part_num=int(part), cb=p.progress) newetag = mpart.etag.replace("\"","") if newetag != srckey.md5: ## Add alert raise Exception("Something bad happened for %s. \ Expecting %s md5, but got %s" % \ (report_name, srckey.md5, newetag)) if mid != campanile.NULL: print "%s\t%s\t%s\t%s\t%s\t%s\t%s" % \ (name, etag, mid, newetag, part, startbyte, stopbyte) campanile.counter(args.dst, "InputBytes", expected_size) campanile.status("%s/%s:OK" % (args.dst,report_name))
def main(): ## Args parser = argparse.ArgumentParser() parser.add_argument('--src-bucket', required=True, dest='src', help='Source S3 bucket') parser.add_argument('--dst-bucket', required=True, dest='dst', help='Destination S3 bucket') parser.add_argument('--src-endpoint', default=boto.s3.connection.NoHostProvided, help='S3 source endpoint') parser.add_argument('--dst-endpoint', default=boto.s3.connection.NoHostProvided, help='S3 destination endpoint') parser.add_argument('--src-profile', help='Boto profile used for source connection') parser.add_argument('--dst-profile', help='Boto profile used for destination connection') parser.add_argument('--config', '-c', default="./campanile.cfg", help='Path to config file') args = parser.parse_args() ## Config Object cfgfiles = campanile.cfg_file_locations() cfgfiles.insert(0, args.config) c = ConfigParser.SafeConfigParser({'ephemeral': '/tmp'}) c.read(cfgfiles) ## S3 Bucket Connections src_bucket = S3Connection(suppress_consec_slashes=False,\ host=args.src_endpoint,is_secure=True, profile_name=args.src_profile).\ get_bucket(args.src,validate=False) dst_bucket = S3Connection(suppress_consec_slashes=False,\ host=args.dst_endpoint,is_secure=True, profile_name=args.dst_profile).\ get_bucket(args.dst,validate=False) ## Reporting Counters files = 0 movedbytes = 0 ## Select random tmpdir to distribute load across disks tmpdir = random.choice(c.get('DEFAULT', "ephemeral").split(',')) start_index = campanile.stream_index() for line in fileinput.input("-"): name, etag, size, mtime, mid, part, partcount, startbyte, stopbyte \ = line.rstrip('\n').split('\t')[start_index:] srckey = src_bucket.get_key(name, validate=False) dstkey = dst_bucket.get_key(name, validate=False) if mid == campanile.NULL: headers = {} report_name = name expected_size = int(size) else: headers = {'Range': "bytes=%s-%s" % (startbyte, stopbyte)} report_name = "%s-%s" % (name, 'part') expected_size = int(stopbyte) - int(startbyte) + 1 with tempfile.SpooledTemporaryFile(max_size=c.getint('DEFAULT',\ 'maxtmpsize'),dir=tmpdir) as fp: ## Download p = campanile.FileProgress(name, verbose=1) srckey.get_contents_to_file(fp, headers=headers, cb=p.progress) if fp.tell() != expected_size: raise Exception("Something bad happened for %s. \ Expecting %s, but got %s" % \ (report_name, expected_size, fp.tell())) campanile.counter(args.src, "OutputBytes", size) fp.flush fp.seek(0) if mid == campanile.NULL: dstkey.cache_control = srckey.cache_control dstkey.content_type = srckey.content_type dstkey.content_encoding = srckey.content_encoding dstkey.content_disposition = srckey.content_disposition dstkey.content_language = srckey.content_language dstkey.metadata = srckey.metadata dstkey.md5 = srckey.md5 report_name = name else: mp = boto.s3.multipart.MultiPartUpload(bucket=dst_bucket) mp.id = mid mp.key_name = name report_name = "%s-%s" % (name, part) ## Upload p = campanile.FileProgress(report_name, verbose=1) if mid == campanile.NULL: dstkey.set_contents_from_file(fp, encrypt_key=srckey.encrypted, cb=p.progress) newetag = dstkey.etag.replace("\"", "") else: mpart = mp.upload_part_from_file(fp, part_num=int(part), cb=p.progress) newetag = mpart.etag.replace("\"", "") if newetag != srckey.md5: ## Add alert raise Exception("Something bad happened for %s. \ Expecting %s md5, but got %s" % \ (report_name, srckey.md5, newetag)) if mid != campanile.NULL: print "%s\t%s\t%s\t%s\t%s\t%s\t%s" % \ (name, etag, mid, newetag, part, startbyte, stopbyte) campanile.counter(args.dst, "InputBytes", expected_size) campanile.status("%s/%s:OK" % (args.dst, report_name))