def read(self, key_name): ''' read a key from an S3 bucket, sending the contents to standard output ''' k = Key(self.bucket) k.key = key_name k.get_contents_as_string()
def test_transport_addr_policy(self): bucket = self.conn.create_bucket(self.bucket_name) key_name = str(uuid.uuid4()) key = Key(bucket, key_name) key.set_contents_from_string(self.data) ## anyone may GET this object policy = ''' {"Version":"2008-10-17","Statement":[{"Sid":"Stmtaaa0","Effect":"Allow","Principal":"*","Action":["s3:GetObject"],"Resource":"arn:aws:s3:::%s/*","Condition":{"Bool":{"aws:SecureTransport":false}}}]} ''' % bucket.name self.assertTrue(bucket.set_policy(policy, headers={'content-type':'application/json'})) key.get_contents_as_string() ## policy accepts anyone who comes with http conn = httplib.HTTPConnection(self.host, self.port) headers = { "Host" : "%s.s3.amazonaws.com" % bucket.name } conn.request('GET', ("/%s" % key_name) , None, headers) response = conn.getresponse() self.assertEqual(response.status, 200) self.assertEqual(response.read(), key.get_contents_as_string()) ## anyone without https may not do any operation policy = ''' {"Version":"2008-10-17","Statement":[{"Sid":"Stmtaaa0","Effect":"Deny","Principal":"*","Action":"*","Resource":"arn:aws:s3:::%s/*","Condition":{"Bool":{"aws:SecureTransport":false}}}]} ''' % bucket.name self.assertTrue(bucket.set_policy(policy, headers={'content-type':'application/json'})) ## policy accepts anyone who comes with http conn = httplib.HTTPConnection(self.host, self.port) headers = { "Host" : "%s.s3.amazonaws.com" % bucket.name } conn.request('GET', ("/%s" % key_name) , None, headers) response = conn.getresponse() self.assertEqual(response.status, 403) self.assertEqual(response.reason, 'Forbidden')
def get_file(self, key, compressed_string=False, headers=None): """ Downloads file from AWS S3 and returns a GzipFile object. Optionally if compressed_string == True then it can return the compressed data as a string. """ if not self.CONN: if self.aws_key and self.aws_secret: self.CONN = S3Connection(self.aws_key, self.aws_secret) else: self.CONN = S3Connection() if not self.bucket: self.bucket = self.CONN.get_bucket('aws-publicdatasets', validate=False) item = Key(self.bucket) item.key = key if compressed_string: if headers: return item.get_contents_as_string(headers=headers) else: return item.get_contents_as_string() else: if headers: return gzip.GzipFile(fileobj=StringIO( item.get_contents_as_string(headers=headers))) else: return gzip.GzipFile( fileobj=StringIO(item.get_contents_as_string()))
def test_ip_addr_policy(self): bucket = self.conn.create_bucket(self.bucket_name) policy = ''' {"Version":"2008-10-17","Statement":[{"Sid":"Stmtaaa","Effect":"Deny","Principal":"*","Action":["s3:GetObject"],"Resource":"arn:aws:s3:::%s/*","Condition":{"IpAddress":{"aws:SourceIp":"%s"}}}]} ''' % (bucket.name, self.host) self.assertTrue( bucket.set_policy(policy, headers={'content-type': 'application/json'})) key_name = str(uuid.uuid4()) key = Key(bucket, key_name) key.set_contents_from_string(self.data) bucket.list() try: key.get_contents_as_string() self.fail() except S3ResponseError as e: self.assertEqual(e.status, 404) self.assertEqual(e.reason, 'Object Not Found') policy = ''' {"Version":"2008-10-17","Statement":[{"Sid":"Stmtaaa","Effect":"Allow","Principal":"*","Action":["s3:GetObject"],"Resource":"arn:aws:s3:::%s/*","Condition":{"IpAddress":{"aws:SourceIp":"%s"}}}]} ''' % (bucket.name, self.host) self.assertTrue( bucket.set_policy(policy, headers={'content-type': 'application/json'})) key.get_contents_as_string() ## throws nothing
def updateStatusFile(completedFile): dst_s3conn = boto.connect_s3(DST_AWS_ACCESS_KEY, DST_AWS_SECRET_KEY) dst_bucket = dst_s3conn.get_bucket(DST_PATH[:DST_PATH.index('/')]) status_file_path = "%s/%s" % (completedFile.rsplit('/', 1)[0],PROCESSING_STATUS_FILE) status_file_key = Key(dst_bucket, status_file_path) theCompletedFile = completedFile.rsplit('/', 1)[1].strip() try: if not status_file_key.exists(): if len(completionListVerify) > 0: print("Seeking Verification for %s, but this directory has no status file... rerun scheduler to delete / restart processing please. I'll end now on this directory" % completedFile) return print ("WARN: failed to retrieve file \"%s\", starting new key." % status_file_path) status_file_key.set_contents_from_string(theCompletedFile) else: status_file_text = bytes(status_file_key.get_contents_as_string()).decode(encoding='UTF-8') #everything has been done if we hvae a completionList object, so validate this if len(status_file_text) < 3: new_status_file_text = theCompletedFile else: new_status_file_text = "%s\n%s" % (status_file_text, theCompletedFile) status_file_key.set_contents_from_string(new_status_file_text) print("Updated Status file with latest data, file %s" % theCompletedFile) except: print("Exception trying to update Status file with dir \"%s\", trying again" % completedFile) updateStatusFile(completedFile) #validate status file contents status_file_text = bytes(status_file_key.get_contents_as_string()).decode(encoding='UTF-8') if theCompletedFile in status_file_text: return else: print("The file \"%s\" didn't get outputted to the status file, trying again") updateStatusFile(completedFile) dst_s3conn.close()
def test_unicode_key_with_slash(): conn = boto.connect_s3('the_key', 'the_secret') bucket = conn.create_bucket("foobar") key = Key(bucket) key.key = "/the-key-unîcode/test" key.set_contents_from_string("value") key = bucket.get_key("/the-key-unîcode/test") key.get_contents_as_string().should.equal(b'value')
def set_data(self, ids, name, value): if value is False or value is None: return cursor = Transaction().cursor db_name = cursor.dbname s3_conn = S3Connection( CONFIG['s3_access_key'], CONFIG['s3_secret_key'] ) bucket = s3_conn.get_bucket(CONFIG.options['data_s3_bucket']) data = base64.decodestring(value) if hashlib: digest = hashlib.md5(data).hexdigest() else: digest = md5.new(data).hexdigest() filename = ":".join([db_name, digest]) collision = 0 if bucket.get_key(filename): key2 = Key(bucket) key2.key = filename data2 = key2.get_contents_as_string() if data != data2: cursor.execute('SELECT DISTINCT(collision) FROM ir_attachment ' \ 'WHERE digest = %s ' \ 'AND collision != 0 ' \ 'ORDER BY collision', (digest,)) collision2 = 0 for row in cursor.fetchall(): collision2 = row[0] filename = ":".join([ db_name, digest + '-' + str(collision2) ]) if bucket.get_key(filename): key2 = Key(bucket) key2.key = filename data2 = key2.get_contents_as_string() if data == data2: collision = collision2 break if collision == 0: collision = collision2 + 1 filename = ":".join([ db_name, digest + '-' + str(collision) ]) key = Key(bucket) key.key = filename key.set_contents_from_string(data) else: key = Key(bucket) key.key = filename key.set_contents_from_string(data) self.write(ids, { 'digest': digest, 'collision': collision, })
def test_empty_key(): conn = boto.connect_s3('the_key', 'the_secret') bucket = conn.create_bucket("foobar") key = Key(bucket) key.key = "the-key" key.set_contents_from_string("") key = bucket.get_key("the-key") key.size.should.equal(0) key.get_contents_as_string().should.equal(b'')
def download(self, filepath, format): k = Key(self.bucket) k.key = filepath if k.exists(): if format == "txt": return k.get_contents_as_string() elif format == "json": return json.loads(k.get_contents_as_string()) else: return None
def download_data(conn, user, bucket_name): try: bucket = conn.get_bucket(bucket_name) except Exception as e: print 'Not allowed to access bucket "{}": User "{}" not in the same location as bucket "{}"'.format( bucket_name, user, bucket_name) else: k = Key(bucket) k.key = 'foobar' print k.get_contents_as_string()
def load(self, key, t=None): conn = boto.connect_s3(self.access_key, self.secret_key) bucket = conn.get_bucket(self.bucket) k = Key(bucket) k.key = key if t is not None and t == 'obj': o = pickle.loads(k.get_contents_as_string()) else: o = k.get_contents_as_string() conn.close() return o
def test_flush(self): path = 'test_flush.txt' url = self.get_url(path) fl = LOREM + "\n" + LOREM + "\n" fl2 = fl + fl f = s3open(url, key=self.key, secret=self.secret) f.write(fl) f.flush() k = Key(self.bucket, path) self.assertEqual(k.get_contents_as_string(), fl) f.write(fl) f.close() self.assertEqual(k.get_contents_as_string(), fl2)
def s3_test(): #conn = boto.connect_s3() conn = S3Connection() #bucket = conn.create_bucket('happybirthdaysohrob2') #k = Key(bucket) k = connection.Key(conn.get_bucket('happybirthdaysohrob', validate=False)) k.name = 'hello/world' k.key = 'foobar' k.set_contents_from_string('Ceci n\'est pas une pipe') b = conn.get_bucket('happybirthdaysohrob') k = Key(b) k.key = 'foobar' print(k.get_contents_as_string()) return(k.get_contents_as_string())
def assert_updated_metadata(self, bucket, key_name): key = Key(bucket, key_name) key.get_contents_as_string() # unchanged self.assertEqual(key.get_metadata("uid"), "0") # updated self.assertEqual(key.content_disposition, 'attachment; filename="newname.txt"') self.assertEqual(key.get_metadata("mtime"), "2222222222") # removed self.assertEqual(key.content_encoding, None) self.assertEqual(key.get_metadata("with-hypen"), None) # inserted self.assertEqual(key.get_metadata("new-entry"), "NEW")
def s3_download(key): conn = boto.connect_s3(app.config["S3_KEY"], app.config["S3_SECRET"]) bucket = conn.get_bucket(app.config["S3_BUCKET"]) k = Key(bucket) k.key = key return k.get_contents_as_string()
def _read_s3(self): if self.snapshots: return conn = S3Connection(self.aws_access_key_id, self.aws_secret_access_key) bucket = conn.get_bucket(self.s3_bucket, validate=False) self.snapshots = [] prefix = self.base_path if not self.base_path.endswith('/'): prefix = "{!s}/".format(self.base_path) snap_paths = [snap.name for snap in bucket.list( prefix=prefix, delimiter='/')] # Remove the root dir from the list since it won't have a manifest file. snap_paths = [x for x in snap_paths if x != prefix] for snap_path in snap_paths: mkey = Key(bucket) manifest_path = '/'.join([snap_path, 'manifest.json']) mkey.key = manifest_path try: manifest_data = mkey.get_contents_as_string() except S3ResponseError as e: # manifest.json not found. logging.warn("Response: {!r} manifest_path: {!r}".format( e.message, manifest_path)) continue try: self.snapshots.append( Snapshot.load_manifest_file(manifest_data, self.s3_bucket)) except Exception as e: # Invalid json format. logging.error("Parsing manifest.json failed. {!r}".format( e.message)) continue self.snapshots = sorted(self.snapshots, reverse=True)
def test_get_object_acl(self): bucket = self.conn.create_bucket(self.bucket_name) k = Key(bucket) k.key = self.key_name k.set_contents_from_string(self.data) self.assertEqual(k.get_contents_as_string(), self.data) self.assertEqual(k.get_acl().to_xml(), self.defaultAcl(self.user1))
def read_object_content(self, obj_id): if not self.conn: self.do_connect() k = Key(bucket=self.bucket, name=obj_id) return k.get_contents_as_string()
def read_chunk(chunk_path): """ Get a chunk of data from S3. Return the data on success Return None on error, and log an exception. """ global AWS_BUCKET bucket = get_bucket(AWS_BUCKET) if bucket == None: log.error("Failed to get bucket '%s'" % AWS_BUCKET) return None # replace / with \x2f chunk_path = chunk_path.replace("/", r"\x2f") k = Key(bucket) k.key = chunk_path data = None try: data = k.get_contents_as_string() except Exception, e: log.error("Failed to read '%s'" % chunk_path) log.exception(e)
def read_eif_from_s3(conn, bucket_name, eif_filename): bucket = conn.get_bucket(bucket_name) key = Key(bucket) key.key = eif_filename json_input = key.get_contents_as_string() data = json.loads(json_input) return data
def getFile(self, fileName): print("Downloading {} from Amazon S3 bucket {}".format( fileName, self.bucketName)) k = Key(self.bucket) k.key = fileName return k.get_contents_as_string()
def _read_chunk(self, chunkhash): chunkhash = hexlify(chunkhash) k = Key(self.b) k.key = 'data/' + chunkhash data = k.get_contents_as_string() self.status.inc_t_chunks_d(len(data)) return data
def s3test(args): KEY = args['key'] SECRET = args['secret'] HOST = args['host'] BUCKET = args['bucket'] ENV = args['env'] logging.info('Test started for env %s', ENV) conn = boto.connect_s3( aws_access_key_id=KEY, aws_secret_access_key=SECRET, host=HOST, calling_format=boto.s3.connection.OrdinaryCallingFormat(), ) logging.info('Creating bucket %s', BUCKET) bucket = conn.create_bucket(BUCKET) logging.info('Bucket create for env %s', ENV) k = Key(bucket) k.key = 's3-canary' logging.info('Writing file') k.set_contents_from_string('This is a test of S3') logging.info('Write done for env %s', ENV) time.sleep(3) logging.info('Reading back the file for env %s', ENV) if k.get_contents_as_string() != "This is a test of S3": raise CustomError('Failed to read back the created file !') logging.info('File has been read back for env %s', ENV) logging.info('Deleting bucket for env %s', ENV) bucket.delete_key(k) logging.info('Bucket deleted for env %s', ENV)
def get_data(self, name): """ Get the data from S3 instead of filesystem. The filename is built as '<DBNAME>/<FILENAME>' in the given S3 bucket :param name: name of field name :return: Buffer of the file binary """ s3_conn = S3Connection( CONFIG['s3_access_key'], CONFIG['s3_secret_key'] ) bucket = s3_conn.get_bucket(CONFIG.options['data_s3_bucket']) db_name = Transaction().cursor.dbname format_ = Transaction().context.pop( '%s.%s' % (self.__name__, name), '' ) value = None if name == 'data_size' or format_ == 'size': value = 0 if self.digest: filename = self.digest if self.collision: filename = filename + '-' + str(self.collision) filename = "/".join([db_name, filename]) if name == 'data_size' or format_ == 'size': key = bucket.get_key(filename) value = key.size else: k = Key(bucket) k.key = filename value = buffer(k.get_contents_as_string()) return value
def s3open(self, filepath): bucketname, keyname = decompose_s3_url(filepath) bucket = self.s3conn.get_bucket(bucketname) key = Key(bucket) key.key = keyname as_string = key.get_contents_as_string() return as_string
def call_services(keyname): global BUCKET key = BUCKET.get_key(keyname) if key is None: return eventfile = "data_processing/%s_%s_%s" % (boto.utils.get_instance_metadata()['local-hostname'], str(time.time()), str(int(random.randint(0, 100)))) try: key.copy('nlp-data', eventfile) key.delete() except S3ResponseError as e: print e print 'EVENT FILE %s NOT FOUND!' % eventfile return except KeyboardInterrupt: sys.exit() print 'STARTING EVENT FILE %s' % eventfile k = Key(BUCKET) k.key = eventfile print k.key map(process_file, k.get_contents_as_string().split('\n')) print 'EVENT FILE %s COMPLETE' % eventfile k.delete()
def test_multiple_versions_of_one_file(self): """ test that we get multiple versions of a file """ key_names = [ "test-key1", "test-key1", "test-key1", ] # create the bucket bucket = self._s3_connection.create_unique_bucket() bucket.configure_versioning(True) self.assertTrue(bucket is not None) _clear_keys(bucket) keys_with_data = _create_some_keys_with_data(bucket, key_names) keys_with_data_dict = \ dict([(key.version_id, data, ) for (key, data) in keys_with_data]) result = bucket.get_all_versions() self.assertEqual(len(result), len(key_names)) for result_key in result: read_key = Key(bucket) read_key.name = result_key.name read_key_data = read_key.get_contents_as_string( version_id=result_key.version_id) self.assertEqual(read_key_data, keys_with_data_dict[result_key.version_id], result_key.name) _clear_bucket(self._s3_connection, bucket)
def load_json(key_name): """ Get contents of key as json """ key = Key(_bucket, key_name) contents = key.get_contents_as_string() return json.loads(contents)
def read(self, key): """get content from a location designated by *key* in a public bucket (default) or private bucket (if private=True)""" bucket = self.public_bucket k = Key(bucket) k.key = key return (k.get_contents_as_string())
def get(cls, ids, model, name, values=None): ''' Get the File from Amazon S3 :param ids: List of integer ids of records :param model: Model object :param name: Name passed to function field :param values: Apparently unused :return: a dictionary with ids as key and values as value ''' result = {} bucket = cls.get_bucket() for record in model.browse(ids): filename = cls.get_filename(model, record.id) if name == 'data_size': try: key = bucket.get_key(filename) value = key.size except S3ResponseError, exc: logging.error(exc) value = 0 else: try: key = Key(bucket) key.key = filename value = bytearray(key.get_contents_as_string()) except S3ResponseError, exc: logging.error(exc) value = None
def get_all_files(self): try: rs = self.s3.get_all_buckets() files = [] bucketmap = dict() for b in rs: bucket_dic, map_path = self.get_files_from_bucket(b) if map_path != '': # print map_path cipher = AES.new(process_cipherkey('password'), AES.MODE_ECB, CYPHER_IV) k = Key(b) k.key = map_path data = cipher.decrypt(k.get_contents_as_string()) for line in data.split('\n'): # print line if line == 'bucket to dir': continue elif line == 'dir to bucket': break else: dirs = line.split() bucketmap[dirs[0]] = eval(dirs[1]) for prefix in bucket_dic: filename = b.name + '/' + prefix + '_' + bucket_dic[prefix][0] + '_' + str(bucket_dic[prefix][1]) files.append(filename) return bucketmap, files except IOError: print 'Cannot get s3 files'
def main(argv): ## PARAM OVERRIDES KurmaAWSTestLib.GLOBAL_DEBUG = 1 bucket_name = 'readafterwrite003kurmaeu' ret = KurmaAWSTestLib.fetchArgs(argv) if(ret == -1): sys.exit(2) userObj = boto.s3.connect_to_region( 'eu-west-1', aws_access_key_id=KurmaAWSTestLib.user_profiles[0]['access'], aws_secret_access_key=KurmaAWSTestLib.user_profiles[0]['secret'], calling_format=boto.s3.connection.OrdinaryCallingFormat()) bucket = userObj.get_bucket(bucket_name) j = 0 while (j < 10000): j = j + 1 k = Key(bucket) keystring = 'testobj' k.key = keystring try: mstr = k.get_contents_as_string() print ("Read " + keystring + ":" + mstr + " at: "+ str(datetime.now())) except: print("---------- Could not find " + keystring + " at: " + str(datetime.now())) pass return
def read(self,key): """get content from a location designated by *key* in a public bucket (default) or private bucket (if private=True)""" bucket=self.public_bucket k = Key(bucket) k.key=key return(k.get_contents_as_string())
def reducer2(self, key, value): mass = 0. num_nodes = 0 #conn = boto.connect_s3() aws_access_key_id = get_jobconf_value('aws_access_key_id') aws_secret_access_key = get_jobconf_value('aws_secret_access_key') conn = S3Connection(aws_access_key_id, aws_secret_access_key) mybucket = conn.get_bucket('ucb-mids-mls-juanjocarin') k = Key(mybucket) mykey = 'num_mass{}'.format(self.iteration) k.key = mykey data = k.get_contents_as_string() data = data.strip().split('\t') num_nodes = num_nodes + int(data[0]) mass = mass + float(data[1]) for v in value: ## 1) Its adjacency list sinks = v[0] ## 2) The corrected PR ## PR = alpha * PR + alpha * m / |G| + (1-alpha) * (1/|G|) PR = self.alpha * (v[1] + mass / num_nodes) PR = PR + (1 - self.alpha) / num_nodes yield key, [sinks, PR]
def get_object(self, name): k = Key(self.bucket, name) start_time = time.time() buf = k.get_contents_as_string() duration = time.time() - start_time #print "%s: %f" % (name, duration) return duration
def update_projects(): conn = S3Connection(AWS_KEY, AWS_SECRET) bucket = conn.get_bucket(BUCKET) pj_list = Key(bucket) pj_list.key = 'projects.json' project_list = json.loads(pj_list.get_contents_as_string()) pj_list.close() details = [] for project_url in project_list: try: pj_details = update_project(project_url) except IOError: return 'Github is throttling. Just gonna try again after limit is reset.' if pj_details: details.append(pj_details) pj_details = Key(bucket) pj_details.key = 'project_details.json' pj_details.set_contents_from_string(json.dumps(details)) pj_details.set_metadata('Content-Type', 'application/json') pj_details.set_acl('public-read') pj_details.close() people_list = Key(bucket) people_list.key = 'people.json' people_list.set_contents_from_string(json.dumps(get_people_totals(details))) people_list.set_metadata('Content-Type', 'application/json') people_list.set_acl('public-read') people_list.close() org_list = Key(bucket) org_list.key = 'organizations.json' org_list.set_contents_from_string(json.dumps(get_org_totals(details))) org_list.set_metadata('Content-Type', 'application/json') org_list.set_acl('public-read') org_list.close() return 'Updated'
def download_database(): bucket_name = os.environ['DYNDNS_BUCKET'] connection = connect_s3(calling_format=OrdinaryCallingFormat()) bucket = connection.get_bucket(bucket_name) bucket_data = Key(bucket) bucket_data.key = os.environ['DYNDNS_DATABASE'] return bucket_data.get_contents_as_string()
def _getImageFromS3(bucket, name): num_retries = 0 max_retries = 5 while True: try: key = Key(bucket, name) data = key.get_contents_as_string() key.close() return data except Exception as e: logs.warning("S3 Exception: %s" % e) num_retries += 1 if num_retries > max_retries: msg = "Unable to connect to S3 after %d retries (%s)" % (max_retries, self.__class__.__name__) logs.warning(msg) raise Exception(msg) logs.info("Retrying (%s)" % (num_retries)) time.sleep(0.5) finally: try: if not key.closed: key.close() except Exception: logs.warning("Error closing key")
def read_chunk( chunk_path ): """ Get a chunk of data from S3. Return the data on success Return None on error, and log an exception. """ global AWS_BUCKET bucket = get_bucket( AWS_BUCKET ) if bucket == None: log.error("Failed to get bucket '%s'" % AWS_BUCKET) return None # replace / with \x2f chunk_path = chunk_path.replace( "/", r"\x2f" ) k = Key(bucket) k.key = chunk_path data = None try: data = k.get_contents_as_string() except Exception, e: log.error("Failed to read '%s'" % chunk_path) log.exception(e)
def execute(self, context, obj): connection = S3Connection() bucket = Bucket(connection=connection, name=context['bucket']) key1 = Key(bucket=bucket, name=context['name']) key2 = Key(bucket=bucket, name=context['name'] + '.encrypted') key2.set_contents_from_string(key1.get_contents_as_string()) return 'done'
def _read_s3(self): if self.snapshots: return conn = S3Connection(self.aws_access_key_id, self.aws_secret_access_key, host=self.s3_connection_host) bucket = conn.get_bucket(self.s3_bucket, validate=False) self.snapshots = [] prefix = self.base_path if not self.base_path.endswith('/'): prefix = "{!s}/".format(self.base_path) snap_paths = [ snap.name for snap in bucket.list(prefix=prefix, delimiter='/') ] # Remove the root dir from the list since it won't have a manifest file. snap_paths = [x for x in snap_paths if x != prefix] for snap_path in snap_paths: mkey = Key(bucket) manifest_path = '/'.join([snap_path, 'manifest.json']) mkey.key = manifest_path try: manifest_data = mkey.get_contents_as_string() except S3ResponseError as e: # manifest.json not found. logging.warn("Response: {!r} manifest_path: {!r}".format( e.message, manifest_path)) continue try: self.snapshots.append( Snapshot.load_manifest_file(manifest_data, self.s3_bucket)) except Exception as e: # Invalid json format. logging.error("Parsing manifest.json failed. {!r}".format( e.message)) continue self.snapshots = sorted(self.snapshots, reverse=True)
def insertRaw(self, location): # this is the main function to insert new text into the system... self.location = location k = Key(self.bucket) k.key = location self.raw = k.get_contents_as_string() # these already split files should be small enough to store in the RAM of the computer - so this should be safe # If I begin dealing with significantly larger files - then stream text instead self.sents = self.sent_tokenizer.tokenize(self.raw) for sent in self.sents: sentReturn = {} # var to hold the output of one sentence worth of input tokenized = self.pos_tag(self.tokenize(sent)) NPs = self.NPChunker.parse(tokenized) relationBounds = (self.relationIdentify.returnRelationshipBounds(tokenized)) sentReturn['relations'] = self.NPAssembler.returnRelationships(tokenized, relationBounds, NPs) sentReturn['sentence'] = sent self.insertRelations(sentReturn) return True
def get_data(self, ids, name): res = {} db_name = Transaction().cursor.dbname s3_conn = S3Connection( CONFIG['s3_access_key'], CONFIG['s3_secret_key'] ) for attachment in self.browse(ids): value = False if name == 'data_size': value = 0 if attachment.digest: filename = attachment.digest if attachment.collision: filename = filename + '-' + str(attachment.collision) filename = ":".join([db_name, filename]) bucket = s3_conn.get_bucket(CONFIG.options['data_s3_bucket']) if name == 'data_size': key = bucket.get_key(filename) value = key.size else: k = Key(bucket) k.key = filename value = base64.encodestring(k.get_contents_as_string()) res[attachment.id] = value return res
def track_download_meta_init_test(self): self.conn.sync() bucket = self.conn.get_bucket(self.name) key = BotoKey(self.boto_bucket) key.key = 'download' key.set_contents_from_string('download') key = Key(bucket, 'download') key.get_contents_as_string() meta = mimicdb.backend.hgetall(tpl.key % (self.name, 'download')) assert int(meta['size']) == len('download') assert meta['md5'] == hashlib.md5('download').hexdigest()
def test_key_with_strings(self): """ test simple key 'from_string' and 'as_string' functions """ key_name = "test-key" test_string = os.urandom(1024) # create the bucket bucket = self._s3_connection.create_unique_bucket() self.assertTrue(bucket is not None) # create an empty key write_key = Key(bucket) # set the name write_key.name = key_name # self.assertFalse(write_key.exists()) # upload some data write_key.set_contents_from_string(test_string) self.assertTrue(write_key.exists()) # create another key with the same name read_key = Key(bucket, key_name) # read back the data returned_string = read_key.get_contents_as_string() self.assertEqual(returned_string, test_string, (len(returned_string), len(test_string))) # delete the key read_key.delete() self.assertFalse(write_key.exists()) # delete the bucket self._s3_connection.delete_bucket(bucket.name)
def load(self, key): """ Load key from the backend. :param key: the key for the object to load """ if key is None: raise ValueError(u'Key cannot be None') k = Key(self.bucket) k.key = key try: data = k.get_contents_as_string() except S3ResponseError as e: # raised if the key cant be found return None except Exception as e: raise e return None else: return data