def __init__(self, account_id, application_key, bucket_id, enable_hashfiles, temp_folder, use_disk): account_info = InMemoryAccountInfo() self.api = B2Api(account_info) self.api.authorize_account('production', account_id, application_key) self.bucket_api = CachedBucket(self.api, bucket_id) self.logger = logging.getLogger("%s.%s" % (__name__, self.__class__.__name__)) self.enable_hashfiles = enable_hashfiles self.temp_folder = temp_folder self.use_disk = use_disk if self.use_disk: if os.path.exists(self.temp_folder): self.logger.error("Temporary folder exists, exiting") exit(1) os.makedirs(self.temp_folder) self.B2File = B2FileDisk else: self.B2File = B2SequentialFileMemory self._directories = DirectoryStructure() self.local_directories = [] self.open_files = defaultdict(self.B2File) self.fd = 0
def setUp(self): self.bucket_name = 'my-bucket' self.simulator = RawSimulator() self.account_info = StubAccountInfo() self.api = B2Api(self.account_info, raw_api=self.simulator) self.api.authorize_account('production', 'my-account', 'good-app-key') self.bucket = self.api.create_bucket('my-bucket', 'allPublic')
def __init__(self, bucket, path, account_id, app_key, workers=10, compare_method='mtime'): super(B2Reciever, self).__init__() self.log = logging.getLogger("B2Reciever") self.bucket_name = bucket self.path = path.lstrip('/') self.account_id = account_id self.app_key = app_key self.api = B2Api(max_upload_workers=workers) self.api.authorize_account('production', self.account_id, self.app_key) self.bucket = self.api.get_bucket_by_name(self.bucket_name) self.db = None self._db_setup() # The receiver is responsible to determining if a file needs to be uploaded or not self.should_transfer = { "mtime": self._should_transfer_mtime, "size": self._should_transfer_size }[compare_method]
def __init__(self, account_id, application_key, bucket_id, db_file): try: account_info = InMemoryAccountInfo() self.api = B2Api(account_info) self.api.authorize_account("production", account_id, application_key) except B2ConnectionError as e: print(e) raise ConnectionError self.bucket_api = Bucket(self.api, bucket_id) self.file_info_store = FileInfoStore(db_file)
def setUp(self): self.bucket_name = 'my-bucket' self.simulator = RawSimulator() self.account_info = StubAccountInfo() self.api = B2Api(self.account_info, raw_api=self.simulator) (self.account_id, self.master_key) = self.simulator.create_account() self.api.authorize_account('production', self.account_id, self.master_key) self.api_url = self.account_info.get_api_url() self.account_auth_token = self.account_info.get_account_auth_token() self.bucket = self.api.create_bucket('my-bucket', 'allPublic') self.bucket_id = self.bucket.id_
def __init__(self, uri): try: assert uri.startswith('b2://') self.bucket, self.prefix = uri[5:].split('/', 1) self.api = B2Api() self.api.authorize_account('production', self.B2_ACCOUNT_ID, self.B2_APPLICATION_KEY) self.c = self._get_b2_bucket() except (AssertionError, Exception) as e: logger.exception(e) raise CloseSpider('could not initialize B2')
def main(api_key, project_id, spider_name, b2_account_id, b2_application_key, b2_path, delete): bucket_name, root = split_bucket_prefix(b2_path) bucket = None client = ScrapinghubClient(api_key) project = client.get_project(project_id) for name in spider_name: spider = project.spiders.get(name) job_list = spider.jobs.list(state='finished') keys = [] for job in job_list: if 'items' in job and job['items'] > 0: keys.append(job['key']) if keys: if bucket is None: b2_api = B2Api() b2_api.authorize_account('production', b2_account_id, b2_application_key) bucket = b2_api.get_bucket_by_name(bucket_name) for key in keys: job = spider.jobs.get(key) if job: out = io.BytesIO() with gzip.GzipFile(fileobj=out, mode='w') as outfile: for item in job.items.iter(): line = json.dumps(item) + '\n' outfile.write(line.encode('utf8')) content = out.getvalue() file_name = os.path.join(root, name, key.replace('/', '-') + '.jl.gz') upload_file(bucket, file_name, content) if delete: job.delete() logging.warning('job {} deleted'.format(key))
def __init__(self, bucket_id): info = SqliteAccountInfo() self.api = B2Api(info, AuthInfoCache(info), raw_api=B2RawApi(B2Http())) self.bucket = self.api.get_bucket_by_name(bucket_id)
def setUp(self): self.account_info = StubAccountInfo() self.cache = InMemoryCache() self.raw_api = RawSimulator() self.b2_api = B2Api(self.account_info, self.cache, self.raw_api)
def setUp(self): self.account_info = InMemoryAccountInfo() self.cache = DummyCache() self.raw_api = RawSimulator() self.api = B2Api(self.account_info, self.cache, self.raw_api) (self.account_id, self.master_key) = self.raw_api.create_account()
def list_target_files(config): import urllib.parse try: target = urllib.parse.urlparse(config["target"]) except ValueError: return "invalid target" if target.scheme == "file": return [(fn, os.path.getsize(os.path.join(target.path, fn))) for fn in os.listdir(target.path)] elif target.scheme == "rsync": rsync_fn_size_re = re.compile(r'.* ([^ ]*) [^ ]* [^ ]* (.*)') rsync_target = '{host}:{path}' target_path = target.path if not target_path.endswith('/'): target_path = target_path + '/' if target_path.startswith('/'): target_path = target_path[1:] rsync_command = [ 'rsync', '-e', rsync_ssh_options(config["target_rsync_port"], direct=True), '--list-only', '-r', rsync_target.format(host=target.netloc, path=target_path) ] code, listing = shell('check_output', rsync_command, trap=True, capture_stderr=True) if code == 0: ret = [] for l in listing.split('\n'): match = rsync_fn_size_re.match(l) if match: ret.append((match.groups()[1], int(match.groups()[0].replace(',', '')))) return ret else: if 'Permission denied (publickey).' in listing: reason = "Invalid user or check you correctly copied the SSH key." elif 'No such file or directory' in listing: reason = "Provided path {} is invalid.".format(target_path) elif 'Network is unreachable' in listing: reason = "The IP address {} is unreachable.".format( target.hostname) elif 'Could not resolve hostname' in listing: reason = "The hostname {} cannot be resolved.".format( target.hostname) else: reason = "Unknown error. " \ "Please check running 'management/backup.py --verify' " \ "from mailinabox sources to debug the issue." raise ValueError( "Connection to rsync host failed: {}".format(reason)) elif target.scheme == "s3": # match to a Region fix_boto() # must call prior to importing boto import boto.s3 from boto.exception import BotoServerError custom_region = False for region in boto.s3.regions(): if region.endpoint == target.hostname: break else: # If region is not found this is a custom region custom_region = True bucket = target.path[1:].split('/')[0] path = '/'.join(target.path[1:].split('/')[1:]) + '/' # Create a custom region with custom endpoint if custom_region: from boto.s3.connection import S3Connection region = boto.s3.S3RegionInfo(name=bucket, endpoint=target.hostname, connection_cls=S3Connection) # If no prefix is specified, set the path to '', otherwise boto won't list the files if path == '/': path = '' if bucket == "": raise ValueError("Enter an S3 bucket name.") # connect to the region & bucket try: conn = region.connect(aws_access_key_id=config["target_user"], aws_secret_access_key=config["target_pass"]) bucket = conn.get_bucket(bucket) except BotoServerError as e: if e.status == 403: raise ValueError("Invalid S3 access key or secret access key.") elif e.status == 404: raise ValueError("Invalid S3 bucket name.") elif e.status == 301: raise ValueError("Incorrect region for this bucket.") raise ValueError(e.reason) return [(key.name[len(path):], key.size) for key in bucket.list(prefix=path)] elif target.scheme == 'b2': InMemoryAccountInfo = None B2Api = None NonExistentBucket = None if get_os_code() == "Debian10": # WARNING: This is deprecated code using a legacy library. # We need it because Debian 10 ships with an old version of Duplicity from b2.account_info import InMemoryAccountInfo from b2.api import B2Api from b2.exception import NonExistentBucket else: from b2sdk.v1 import InMemoryAccountInfo, B2Api from b2sdk.v1.exception import NonExistentBucket info = InMemoryAccountInfo() b2_api = B2Api(info) # Extract information from target b2_application_keyid = target.netloc[:target.netloc.index(':')] b2_application_key = target.netloc[target.netloc.index(':') + 1:target.netloc.index('@')] b2_bucket = target.netloc[target.netloc.index('@') + 1:] try: b2_api.authorize_account("production", b2_application_keyid, b2_application_key) bucket = b2_api.get_bucket_by_name(b2_bucket) except NonExistentBucket as e: raise ValueError( "B2 Bucket does not exist. Please double check your information!" ) return [(key.file_name, key.size) for key, _ in bucket.ls()] else: raise ValueError(config["target"])