def _connect(self, host, port, user, password): try: self.__connection = Accumulo(host, port, user, password) self.__log.debug('Connected to StaticFile Store') except Exception as e: self.__log.exception('Error while connecting to StaticFile Store: %s' % str(e)) raise Exception('Error while connecting to StaticFile Store: %s' % str(e))
def printTableDB(table): """ Displays the data in the database """ conn = Accumulo(host="localhost", port=50096, user="******", password="******") for entry in conn.scan(table): print(entry.row, entry.cf, entry.cq, entry.cv, entry.ts, entry.val) conn.close()
def exportJsonDB(json_data, frameNum): """ Exports the JSON data to the Accumulo database """ conn = Accumulo(host="localhost", port=50096, user="******", password="******") json_data_parsed = json.loads( json_data) #put json data back into dictionary table = json_data_parsed['videoMetadata'][ 'videoName'] #get the video name and set that as the table name table = table.replace('.', '_') table = table.encode('ascii', 'ignore') if not conn.table_exists(table): conn.create_table(table) m = Mutation("row_%d" % frameNum) #table row number is the frame number m.put(cf="cf2", cq="cq2", val=json_data_parsed['imageBase64'] ) #saves the frame image separately from the metadata if 'LabeledImage' in json_data_parsed.keys(): m.put(cf="cf3", cq="cq3", val=json_data_parsed['LabeledImage'] ) #saves the labeled image separately from the metadata json_data_parsed.pop( 'LabeledImage', None) #delete the base64 representation of the labeled frame json_data_parsed.pop( 'imageBase64', None) #delete the base64 representation of the frame json_data = json.dumps(json_data_parsed) m.put(cf="cf1", cq="cq1", val=json_data) #set the first column to now only the metadata. conn.write(table, m) conn.close()
def direct(config_path, namespace_string): with yakonfig.defaulted_config( [kvlayer], filename=config_path, params={"app_name": "kvlayer", "namespace": namespace_string} ): config = yakonfig.get_global_config("kvlayer") conn = Accumulo( host="test-accumulo-1.diffeo.com", port=50096, user=config["username"], password=config["password"] ) yield conn tables = conn.list_tables() for table in tables: if re.search(namespace_string, table): conn.delete_table(table)
def __connectToAccumulo(self, host, port, user, password): try: self.__dbConnection = Accumulo(host, port, user, password) self.__logger.debug('Successfully connected to CertStore') except Exception as ex: self.__logger.exception('Error in connecting to CertStore: %s' % str(ex)) raise EzRPCertStoreException('Error in connecting to CertStore: %s' % str(ex))
def __connectToAccumulo(self, host, port, user, password): try: self.__logger.info("connecting to CertStore ...") self.__dbConnection = Accumulo(host, port, user, password) self.__logger.info("Successfully connected to CertStore") except Exception as ex: self.__logger.exception("Error in connecting to CertStore: %s" % str(ex)) raise EzRPCertStoreException("Error in connecting to CertStore: %s" % str(ex))
def __init__(self, host="localhost", port=42424, user="******", password="******", num_trials=100, filename='default_file.txt', seed=None, signer_ids=test_ids, pki=test_pki): self.conn = Accumulo(host=host, port=port, user=user, password=password) self.num_trials = num_trials self.filename = filename self.seed = seed self.signer_ids = signer_ids self.pki = pki
def new(cls, elems, lbound, rbound, coin=BaseCoin(), conn_info=ConnInfo('localhost', 42424, 'root', 'secret'), table='__ADS_metadata___', elemclass=IntElem): """ Create a new skiplist that stores all of its data inside an Accumulo instance. Arguments: cls - the class implementing this class method elems - the elements to create the skiplist over lbound, rbound - the left and right boundary elements of the list coin - the source of randomness to use (see pace.ads.skiplist.coin) conn_info - how to connect to the Accumulo instance being used table - the name of the table to store the ADS in elemclass - the class to use to store the elements in the skiplist """ sl = cls(None, lbound, rbound, coin) if conn_info is not None: # For connecting to a live Accumulo instance host, port, user, password = conn_info conn = Accumulo(host=conn_info.host, port=conn_info.port, user=conn_info.user, password=conn_info.password) else: # For testing/debug conn = FakeConnection() sl.conn = conn sl.table = table sl.elemclass = elemclass if not conn.table_exists(table): conn.create_table(table) right = cls.nodeclass.newnode(sl, None, None, rbound, True) left = cls.nodeclass.newnode(sl, None, right, lbound, True) sl.root = left for elem in elems: sl.insert(elem) return sl
# the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from pyaccumulo import Accumulo, Mutation, Range import settings table = "pythontest" conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) if conn.table_exists(table): conn.delete_table(table) conn.create_table(table) wr = conn.create_batch_writer(table) print "Ingesting some data ..." for num in range(1, 100): label = '%03d'%num mut = Mutation('r_%s'%label) mut.put(cf='cf_%s'%label, cq='cq1', val='value_%s'%label) mut.put(cf='cf_%s'%label, cq='cq2', val='value_%s'%label) wr.add_mutation(mut) wr.close()
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from pyaccumulo import Accumulo, Mutation, Range from pyaccumulo.iterators import * from pyaccumulo.proxy.ttypes import IteratorSetting, IteratorScope from examples.util import hashcode import hashlib, re import settings import sys conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) table = sys.argv[1] if not conn.table_exists(table): print "Table '%s' does not exist."%table sys.exit(1) search_terms = [term.lower() for term in sys.argv[2:] if len(term) > 3] if len(search_terms) < 2: print "More than one term of length > 3 is required for this example" sys.exit(1) for e in conn.batch_scan(table, iterators=[IndexedDocIterator(priority=21, terms=search_terms)]): print e.val conn.close()
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from pyaccumulo import Accumulo, Mutation, Range from pyaccumulo.iterators import * from pyaccumulo.proxy.ttypes import IteratorSetting, IteratorScope from examples.util import hashcode import hashlib, re import settings conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) table = "regexes" if conn.table_exists(table): conn.delete_table(table) conn.create_table(table) wr = conn.create_batch_writer(table) license_file = "LICENSE" linenum = 0 with file(license_file) as infile: for line in infile: linenum += 1
def randtask(q,state,x): n=random.randint(0,x) entry=None for entry in conn.scan(table,scanrange=Range(srow=q,erow=q),cols=[[state]]): if n == 0: break else: n=n-1 if entry is None: return None else: return entry.cq conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) table = settings.TABLE if sys.argv[1] == "-c": print "create" wr = conn.create_batch_writer(table) i=0 q="%s:%s"%(Q,sys.argv[2]) mut = Mutation(q) for entry in conn.batch_scan(table,cols=[["Genome","md5"]],numthreads=10): genome=entry.row if i%1000 == 0: print entry.row mut.put(cf=QUEUED,cq=genome) i=i+1 wr.add_mutation(mut)
for tok in tokens: m.put(tok, cq=uuid, val="") if len(m.updates) > 1000: writer.add_mutation(m) m = Mutation(shard) if len(m.updates) > 0: writer.add_mutation(m) try: table = sys.argv[1] input_dirs = sys.argv[2:] except: usage() conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) if not conn.table_exists(table): print "Creating table: %s"%table conn.create_table(table) wr = conn.create_batch_writer(table) for indir in input_dirs: for root, subFolders, files in os.walk(indir): for filename in files: filePath = os.path.join(root, filename) print "indexing file %s"%filePath uuid = get_uuid(filePath) with open( filePath, 'r' ) as f: write_mutations(wr, get_shard(uuid), uuid, filePath, get_tokens(f))
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from pyaccumulo import Accumulo, Mutation, Range from pyaccumulo.iterators import * from pyaccumulo.proxy.ttypes import IteratorSetting, IteratorScope from examples.util import hashcode import hashlib, re import settings import sys conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) table = sys.argv[1] if not conn.table_exists(table): print "Table '%s' does not exist."%table sys.exit(1) search_terms = [term.lower() for term in sys.argv[2:] if len(term) > 3] if len(search_terms) < 2: print "More than one term of length > 3 is required for this example" sys.exit(1) uuids = [] for e in conn.batch_scan(table, scanranges=[Range(srow="s", erow="t")], iterators=[IntersectingIterator(priority=21, terms=search_terms)]): uuids.append(e.cq)
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from pyaccumulo import Accumulo from pyaccumulo.iterators import * import settings import sys conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) table = sys.argv[1] if not conn.table_exists(table): print("Table '%s' does not exist." % table) sys.exit(1) search_terms = [term.lower() for term in sys.argv[2:] if len(term) > 3] if len(search_terms) < 2: print("More than one term of length > 3 is required for this example") sys.exit(1) for e in conn.batch_scan( table, iterators=[IndexedDocIterator(priority=21,
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from pyaccumulo import Accumulo from pyaccumulo.objects import Range from pyaccumulo.iterators import * import settings import sys conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) table = sys.argv[1] if not conn.table_exists(table): print("Table '%s' does not exist." % table) sys.exit(1) search_terms = [term.lower() for term in sys.argv[2:] if len(term) > 3] if len(search_terms) < 2: print("More than one term of length > 3 is required for this example") sys.exit(1) uuids = [] for e in conn.batch_scan(
class Benchmarker(object): BENCHMARKS = [(100, 10), (500, 50), (1000, 100), (5000, 500), (10000, 1000)] FANCY_BENCHMARKS = [(2**i, 2**(i - 1)) for i in range(2, 14)] def __init__(self, host="localhost", port=42424, user="******", password="******", num_trials=100, filename='default_file.txt', seed=None, signer_ids=test_ids, pki=test_pki): self.conn = Accumulo(host=host, port=port, user=user, password=password) self.num_trials = num_trials self.filename = filename self.seed = seed self.signer_ids = signer_ids self.pki = pki def run_test(self, table="test_table_5", default_vis="default", num_entries=100, num_rows=15, signClassName='RSASSA_PKCS1-v1_5', write=True, benchmark=False): """ Runs one iteration of the signature test. If benchmark is set to True, returns the lengths of time it took to sign all the entries and the time it took to verify all the entries. """ table = sanitize(table) seed = self.seed if signClassName == 'ALL': for signClass in ALL_SIGNATURES: self.run_test(table + '_' + sanitize(signClass.name), default_vis, num_entries, num_rows, signClass.name, write, benchmark) return signClass = SIGNATURE_FUNCTIONS[signClassName] pubkey, privkey = signClass.test_keys() if write: signer = AccumuloSigner(privkey, sig_f=signClass) if not seed: # set a new seed if one wasn't specified seed = str(time.time()) generate_data(self.filename, seed, default_vis=default_vis, num_entries=num_entries, num_rows=num_rows) sout = write_and_sign_data(self.filename, self.conn, table, signer, benchmark) vout = verify_data(self.conn, table, pubkey, benchmark) if benchmark: sign_start, sign_end = sout verif_success, verif_start, verif_end = vout print "Time taken to sign: %s" % str(sign_end - sign_start) print "Time taken to verify: %s" % str(verif_end - verif_start) return sign_end - sign_start, verif_end - verif_start def run_benchmarks(self, table_prefix="benchmarking", default_vis="default"): """ Benchmarks each different signature class on a variety of table sizes, measuring the time taken to sign & verify all entries of each table size with each signature algorithm. """ table_prefix = sanitize(table_prefix) for entries, rows in self.BENCHMARKS: print "===============================================================" print "Current benchmark: %d entries over %d rows" % (entries, rows) print "===============================================================" print for signClass in SUPPORTED_SIGNATURES: table = "%s_%s_e%d_r%d" % ( table_prefix, sanitize(signClass.name), entries, rows) print "Benchmarking %s" % (sanitize(signClass.name)) self.run_test(table, default_vis, entries, rows, signClass.name, write=True, benchmark=True) print def run_fancy_benchmarks(self, table_prefix="benchmarking", default_vis="default", resfile="benchmark_results.csv"): """ Runs more benchmarks than run_benchmarks(), then writes the output to a file. """ table_prefix = sanitize(table_prefix) results = [] for entries, rows in self.FANCY_BENCHMARKS: print "===============================================================" print "Current benchmark: %d entries over %d rows" % (entries, rows) print "===============================================================" print classres = [] for signClass in SUPPORTED_SIGNATURES: table = "%s_%s_e%d_r%d" % ( table_prefix, sanitize(signClass.name), entries, rows) print "Benchmarking %s" % (sanitize(signClass.name)) sign_time, verif_time = self.run_test(table, default_vis, entries, rows, signClass.name, write=True, benchmark=True) classres.append((signClass.name, sign_time, verif_time)) print results.append((entries, classres)) print 'time to write to file' with open(resfile, 'w') as f: f.write('num entries,name,sign time,verification time\n') for num_entries, classres in results: for name, stime, vtime in classres: f.write(','.join( [str(num_entries), name, str(stime), str(vtime)])) f.write('\n') print 'wrote to file' def full_benchmark(self, table_prefix="full_benchmarking", default_vis="default", signClass=None, num_entries=10000, num_rows=1000): """ Either run a single benchmark (sign & verify) on one signature class, or run it with no signing class (just write & read) to get a baseline time. """ table_prefix = sanitize(table_prefix) conn = self.conn if signClass: table = table_prefix + '_' + sanitize(signClass.name) else: table = table_prefix + '_baseline' if signClass: pubkey, privkey = signClass.test_keys() signer = AccumuloSigner(privkey, sig_f=signClass) start_time = time.clock() write_and_sign_data(self.filename, conn, table, signer, benchmark=False) end_time = time.clock() total_sign_time = end_time - start_time start_time = time.clock() verify_data(conn, table, pubkey, benchmark=False) end_time = time.clock() total_verif_time = end_time - start_time else: start_time = time.clock() write_data(self.filename, conn, table) end_time = time.clock() total_sign_time = end_time - start_time count = 0 start_time = time.clock() for entry in conn.scan(table): count += 1 end_time = time.clock() total_verif_time = end_time - start_time return (total_sign_time, total_verif_time) def run_full_benchmarks(self, table_prefix="full_benchmarking", default_vis="default", num_entries=10000, num_rows=1000, outfile='full_benchmark_out.csv'): """ Benchmark each signing algorithm, writing the results to a file, and comparing them to a baseline write & read with no signatures. """ table_prefix = sanitize(table_prefix) n = generate_data(self.filename, self.seed, default_vis=default_vis, num_entries=num_entries, num_rows=num_rows) base_write_time, base_read_time = self.full_benchmark( table_prefix, default_vis, None, num_entries, num_rows) with open(outfile, 'w') as f: bw = (base_write_time / n) * 1000 br = (base_read_time / n) * 1000 f.write(','.join(['name', 'signing time', 'verification time'])) f.write('\n') f.write(','.join(['baseline', str(bw), str(br)])) f.write('\n') for signClass in SUPPORTED_SIGNATURES: (st, vt) = self.full_benchmark(table_prefix, default_vis, signClass, num_entries, num_rows) # convert seconds for the whole batch to milliseconds # per element st = (st / n) * 1000 vt = (vt / n) * 1000 f.write(','.join([signClass.name, str(st), str(vt)])) f.write('\n') def fastfail_benchmark(self, table): """ Check how long it takes just to read each element from a table, to see if there's a difference because of the changed visibility fields in signed tables. """ table = sanitize(table) start = time.clock() total = 0 for e in self.conn.scan(table): total += 1 end = time.clock() return end - start def run_fastfail_benchmarks(self, table_prefix="fastfail_benchmarking", default_vis="default", num_rows=1000, num_noisy_entries=50000, num_noisy_rows=1000, outfile='fastfail_benchmark_out_2.csv', num_trials=100, one_vis=False): """ Benchmark to see how much overhead there is from the signature code making Accumulo unable to fast-fail and cache results from visibility field checks. If one_vis is False, it will randomly generate a default visibility value for each field. If it is a string, that string will be treated as the default visibility value for each 'noise' field. """ table_prefix = sanitize(table_prefix) seed = self.seed noisy_filename = 'noisy_' + self.filename if not seed: # set a new seed if one wasn't specified seed = str(time.time()) if one_vis: print 'generating noise with one visibility field' generate_data(noisy_filename, seed, vis=False, default_vis=one_vis, num_entries=num_noisy_entries, num_rows=num_rows) else: print 'generating noise with random visibility fields' generate_data(noisy_filename, seed, vis=True, num_entries=num_noisy_entries, num_rows=num_rows) noisy_table = 'noisy_' + table_prefix write_data(noisy_filename, self.conn, noisy_table) for sc in SUPPORTED_SIGNATURES: pubkey, privkey = sc.test_keys() signer = AccumuloSigner(privkey, sig_f=sc) write_and_sign_data(noisy_filename, self.conn, '_'.join([table_prefix, sanitize(sc.name)]), signer) all_times = [] for n in [(num_noisy_entries / 10000) * (10**i) for i in range(6)]: print 'n:', n generate_data(self.filename, str(time.time()), default_vis=default_vis, num_entries=n, num_rows=min(n, num_rows)) write_data(self.filename, self.conn, noisy_table) base_time = sum([ self.fastfail_benchmark(noisy_table) for j in range(num_trials) ]) times = [] for signClass in SUPPORTED_SIGNATURES: pubkey, privkey = signClass.test_keys() signer = AccumuloSigner(privkey, sig_f=signClass) table = '_'.join([table_prefix, sanitize(signClass.name)]) write_and_sign_data(self.filename, self.conn, table, signer) times.append((signClass.name, sum([ self.fastfail_benchmark(table) for j in range(num_trials) ]))) all_times.append((n, base_time, times)) with open(outfile, 'w') as f: for num_elems, base_time, trials in all_times: print 'Trial for %d elements. Base time: %s' % (num_elems, str(base_time)) f.write('%d,BASE,%s\n' % (num_elems, str(base_time))) for name, ttime in trials: print '\t%s: %s' % (name, str(ttime)) f.write('%d,%s,%s\n' % (num_elems, name, str(ttime))) print def id_test(self, table_prefix="id_test", default_vis="default", num_entries=10000, num_rows=1000): table_prefix = sanitize(table_prefix) generate_data(self.filename, self.seed, default_vis=default_vis, num_entries=num_entries, num_rows=num_rows) for signer_id, sigclass in self.signer_ids: _, privkey = sigclass.test_keys() table = table_prefix + '_' + sanitize(signer_id) signer = AccumuloSigner(privkey, sig_f=sigclass, signerID=signer_id) write_and_sign_data(self.filename, self.conn, table, signer) verify_data(self.conn, table, self.pki, sigclass) def table_test(self, table_prefix="table_test1", default_vis="default", num_entries=10000, num_rows=1000): table_prefix = sanitize(table_prefix) generate_data(self.filename, self.seed, default_vis=default_vis, num_entries=num_entries, num_rows=num_rows) for signer_id, sigclass in self.signer_ids: _, privkey = sigclass.test_keys() table = table_prefix + '_' + sanitize(signer_id) signer = AccumuloSigner(privkey, sig_f=sigclass) write_and_sign_data(self.filename, self.conn, table, signer, include_table=True) verif_key, _ = self.pki.get_verifying_key(signer_id) verify_data(self.conn, table, verif_key, False, include_table=True) def location_test(self, cfg_file, table_prefix="table_test1", default_vis="default", num_entries=10000, num_rows=1000): table_prefix = sanitize(table_prefix) + '_' + sanitize(loc) generate_data(self.filename, self.seed, default_vis=default_vis, num_entries=num_entries, num_rows=num_rows) for signer_id, sigclass in self.signer_ids: _, privkey = sigclass.test_keys() table = table_prefix + '_' + sanitize(signer_id) conf = new_config(cfg_file, self.conn) signer = AccumuloSigner(privkey, sig_f=sigclass, conf=conf) write_and_sign_data(self.filename, self.conn, table, signer) verif_key, _ = self.pki.get_verifying_key(signer_id) verify_data(self.conn, table, verif_key, False, conf=conf)
def main(): parser = OptionParser() parser.add_option("-v", '--verbose', dest="verbose", action="store_true", default=False, help="Verbose output") accumulo_group = OptionGroup( parser, 'Options that control the accumulo connection') accumulo_group.add_option('--host', dest='host', default='localhost', help='Host for Accumulo. Default: localhost') accumulo_group.add_option('--user', dest='user', default='root', help='User for Accumulo. Default: root') accumulo_group.add_option('--password', dest='password', default='secret', help='Password for Accumulo user. Default: ...') accumulo_group.add_option('--port', dest='port', type='int', default=42424, help="Port for Accumulo. Default: 42424") parser.add_option_group(accumulo_group) output_group = OptionGroup(parser, 'Options that control output') output_group.add_option('--log-file', dest='log_file', default='output.log', help='Output file for performance numbers') output_group.add_option('--table-prefix', dest='table_prefix', default='perf', help='Prefix used for data tables') output_group.add_option('--profile', dest='profile', action='store_true', default=False, help="Profiles encryption code") output_group.add_option( '--cache_key', dest='cache_key', action='store_true', default=False, help='Keys are now cached during encryption and decryption') output_group.add_option( '--use_accumulo_keystore', dest='accumulo_keystore', action='store_true', default=False, help= "Keys are stored in Accumulo if option is included, otherwise they are stored locally" ) parser.add_option_group(output_group) test_group = OptionGroup(parser, "Options that control what tests are being run") test_group.add_option('--all', dest='all', action='store_true', default=False, help='Runs all the different tests') test_group.add_option( '--non-ceabac', dest='non_ceabac', action='store_true', default=False, help='Runs the non-CEABAC tests with a simple schema') test_group.add_option('--ceabac', dest='ceabac', action='store_true', default=False, help='Runs the CEABAC tests with a simple schema') test_group.add_option( '--vis-ceabac', dest='vis_ceabac', action='store_true', default=False, help='Runs CEABAC in CBC mode with varying visibility fields') test_group.add_option('--diff_schemas_ceabac', dest='diff_ceabac', action='store_true', default=False, help='Runs several different schemas for VIS_CBC') test_group.add_option('--diff_schemas_non_ceabac', dest='diff_non_ceabac', action='store_true', default=False, help='Runs several different schemas for AES_CBC') test_group.add_option( '--mixed_schemas', dest='mixed_schemas', action='store_true', default=False, help='Runs a set of schemas where the schemes are both CEABAC and not') parser.add_option_group(test_group) entries_group = OptionGroup( parser, "Options that control how many entries are run") entries_group.add_option('--num_entries', dest='num_entries', type='int', default=1000, help='Total number of cells being run') entries_group.add_option('--num_rows', dest='num_rows', type='int', default=100, help='Total number of rows being run') parser.add_option_group(entries_group) (cl_flags, _) = parser.parse_args() #set up logging if cl_flags.verbose: log_level = logging.DEBUG else: log_level = logging.INFO logging.basicConfig(filename=cl_flags.log_file, level=log_level, format='%(levelname)s-%(asctime)s: %(message)s') logger = logging.getLogger("performance_testing") #check inputs if cl_flags.all and (cl_flags.non_ceabac or cl_flags.ceabac or cl_flags.vis_ceabac): logger.error( '--all is already specified, do not need to define other tests to run' ) #create accumulo connection conn = Accumulo(host=cl_flags.host, port=cl_flags.port, user=cl_flags.user, password=cl_flags.password) #create benchmarker if cl_flags.cache_key: logger.info('Using the caching version of the pki') pki = DummyCachingEncryptionPKI( conn=conn if cl_flags.accumulo_keystore else None) else: pki = DummyEncryptionPKI( conn=conn if cl_flags.accumulo_keystore else None) benchmarker = Benchmarker(logger=logger, pki=pki, conn=conn) if cl_flags.all: run_non_ceabac(benchmarker, cl_flags.table_prefix, logger, cl_flags.profile, cl_flags) run_ceabac(benchmarker, cl_flags.table_prefix, logger, cl_flags.profile, cl_flags) run_vis_ceabac(benchmarker, cl_flags.table_prefix, logger, cl_flags.profile, cl_flags) run_diff_ceabac(benchmarker, cl_flags.table_prefix, logger, cl_flags.profile, cl_flags) run_diff_non_ceabac(benchmarker, cl_flags.table_prefix, logger, cl_flags.profile, cl_flags) run_mixed_schemas(benchmarker, cl_flags.table_prefix, logger, cl_flags.profile, cl_flags) if cl_flags.non_ceabac: run_non_ceabac(benchmarker, cl_flags.table_prefix, logger, cl_flags.profile, cl_flags) if cl_flags.ceabac: run_ceabac(benchmarker, cl_flags.table_prefix, logger, cl_flags.profile, cl_flags) if cl_flags.vis_ceabac: run_vis_ceabac(benchmarker, cl_flags.table_prefix, logger, cl_flags.profile, cl_flags) if cl_flags.diff_ceabac: run_diff_ceabac(benchmarker, cl_flags.table_prefix, logger, cl_flags.profile, cl_flags) if cl_flags.diff_non_ceabac: run_diff_non_ceabac(benchmarker, cl_flags.table_prefix, logger, cl_flags.profile, cl_flags) if cl_flags.mixed_schemas: run_mixed_schemas(benchmarker, cl_flags.table_prefix, logger, cl_flags.profile, cl_flags)
class EzRPStaticStore(object): ''' Class to save and retrieve static content from Accumulo. cf = "static" For all rows cq = "hash" Stores the hash_value of Static File cq = "nofchunks" Stores the number of Chunks needed to store Static File cq = "chunk_000" .. "chunk_nnn" Stores the Chunks of Static File ''' def __init__(self, host="localhost", port=42424, user='******', password='******', chunk_size=int(5*1048576), logger=None): self.__host = host self.__port = port self.__user = user self.__password = password self.__table = 'ezfrontend' self.__cf = 'static' self.__connection = None if logger is not None: self.__log = logger else: self.__log = logging.getLogger(self.__module__ + '.' + self.__class__.__name__) self.__log.addHandler(logging.NullHandler()) self.__chunk_size =int(chunk_size) self._connect(self.__host, self.__port, self.__user, self.__password) def _connect(self, host, port, user, password): try: self.__connection = Accumulo(host, port, user, password) self.__log.debug('Connected to StaticFile Store') except Exception as e: self.__log.exception('Error while connecting to StaticFile Store: %s' % str(e)) raise Exception('Error while connecting to StaticFile Store: %s' % str(e)) def _ensureTableExists(self): ''' Make sure that the table exists before any other operation. Reconnect to Accumulo if the Connection is reset. ''' if not self.__connection.table_exists(self.__table): self.__log.info('table "{table}" does not exist in StaticFile Store. Creating the table'.format(table=self.__table)) self.__connection.create_table(self.__table) if not self.__connection.table_exists(self.__table): self.__log.error('Unable to ensure StaticFile Store table "{table} exists'.format(format(table=self.__table))) raise Exception('StaticFile Store: Unable to ensure table "{table}" exists'.format(table=self.__table)) def _ensureNoDuplicates(self, usrFacingUrlPrefix): ''' Ensure a single copy of file for a given usrFacingUrlPrefix ''' if self._getHash(usrFacingUrlPrefix) is not None: self.deleteFile(usrFacingUrlPrefix) def _putNofChunks(self, usrFacingUrlPrefix, length): ''' Put the number of chunks the static contents is stored ''' chunks = int(math.ceil(length / float(self.__chunk_size))) writer = self.__connection.create_batch_writer(self.__table) m = Mutation(usrFacingUrlPrefix) m.put(cf=self.__cf, cq="nofchunks", val=str(chunks)) writer.add_mutation(m) writer.close() def _getNofChunks(self, usrFacingUrlPrefix): ''' Get the number of chunks the static contents is stored ''' scan_range = Range(srow=usrFacingUrlPrefix, scf=self.__cf, scq="nofchunks", erow=usrFacingUrlPrefix, ecf=self.__cf, ecq="nofchunks") for entry in self.__connection.scan(self.__table, scanrange=scan_range): return int(entry.val) return 0 def _getChunks(self, data): ''' Break the blob into CHUNK_SIZE. less than maxFrameSize in Accumulo proxy.properties ''' data_length = len(data) for i in range(0, data_length + 1, self.__chunk_size): yield data[i:i + self.__chunk_size] def _putHash(self, usrFacingUrlPrefix, hash_str): ''' Puts the Hash for usrFacingUrlPrefix ''' writer = self.__connection.create_batch_writer(self.__table) m = Mutation(usrFacingUrlPrefix) m.put(cf=self.__cf, cq="hash", val=hash_str) writer.add_mutation(m) writer.close() def _getHash(self, usrFacingUrlPrefix): scan_range = Range(srow=usrFacingUrlPrefix, scf=self.__cf, scq="hash", erow=usrFacingUrlPrefix, ecf=self.__cf, ecq="hash") for entry in self.__connection.scan(self.__table, scanrange=scan_range): return str(entry.val) else: return None def reConnection(self): self._connect(self.__host, self.__port, self.__user, self.__password) def putFile(self, usrFacingUrlPrefix, hash_str, data): self._ensureTableExists() self._ensureNoDuplicates(usrFacingUrlPrefix) self._putHash(usrFacingUrlPrefix, hash_str) data_length = len(data) self._putNofChunks(usrFacingUrlPrefix, data_length) writer = self.__connection.create_batch_writer(self.__table) for i, chunk in enumerate(self._getChunks(data)): m = Mutation(usrFacingUrlPrefix) m.put(cf=self.__cf, cq="chunk_{number:010d}".format(number=i), val=chunk) writer.add_mutation(m) self.__log.debug('added static file for "{url}" with hash "{hash}" of length "{length}"'.format(url=usrFacingUrlPrefix, hash=hash_str, length=data_length)) writer.close() def getFile(self, usrFacingUrlPrefix): ''' Assembles all the chunks for this row ''' self._ensureTableExists() data = array.array('c') # Create a byte array chunks = self._getNofChunks(usrFacingUrlPrefix) chunks_read = 0 for i in range(chunks): cq = 'chunk_{number:010d}'.format(number=i) for entry in self.__connection.scan(self.__table, None, cols=[[self.__cf, cq]]): if entry.row == usrFacingUrlPrefix and entry.cq.startswith("chunk_"): chunks_read += 1 data.extend(entry.val) # This code gets following error while retrieving over 96MB. Data stops at first chunk_000 # # java.lang.OutOfMemoryError: Java heap space # -XX:OnOutOfMemoryError="kill -9 %p" # Executing /bin/sh -c "kill -9 32597"... # [1]+ Exit 137 sudo -u accumulo /opt/accumulo/current/bin/accumulo proxy -p /opt/accumulo/current/conf/proxy.properties # startChunk = "chunk_{number:010d}".format(number=0) # endChunk = "chunk_{number:010d}".format(number=chunks) # scan_range = Range(srow=usrFacingUrlPrefix, scf=self.__cf, scq=startChunk, # erow=usrFacingUrlPrefix, ecf=self.__cf, ecq=endChunk) # for entry in self.__connection.scan(self.__table, scanrange=scan_range): # #self.__log.info("getFile: row = {0} cq= {1}".format(entry.row, entry.cq)) # if entry.cq.startswith("chunk_"): # self.__log.info("getFile: row = {0} cq= {1}".format(entry.row, entry.cq)) # chunks_read += 1 # data.extend(entry.val) self.__log.debug('retrieved static file for {url}'.format(url=usrFacingUrlPrefix)) if chunks_read != chunks: self.__log.error("did not read all the chunks from StaticFile Store") return data.tostring() if data.buffer_info()[1] > 0 else None def deleteFile(self, usrFacingUrlPrefix): self._ensureTableExists() writer = self.__connection.create_batch_writer(self.__table) chunks = self._getNofChunks(usrFacingUrlPrefix) m = Mutation(usrFacingUrlPrefix) m.put(cf=self.__cf, cq="hash", is_delete=True) m.put(cf=self.__cf, cq="nofchunks", is_delete=True) for i in range(chunks): cq = 'chunk_{number:010d}'.format(number=i) m.put(cf=self.__cf, cq=cq, is_delete=True) writer.add_mutation(m) self.__log.debug('removed static file for {url}'.format(url=usrFacingUrlPrefix)) writer.close() def getAttributes(self): ''' Returns the urlprefix and the hash of all the entries in table as tuple ''' self._ensureTableExists() for entry in self.__connection.scan(self.__table, None, cols=[[self.__cf, "hash"]]): yield (entry.row, str(entry.val)) else: yield (None, None)
def connect(self): self._conn = Accumulo(host=self._host, port=self._port, user=self._user, password=self._password)
class DBConnection: """ The interface to Accumulo. """ __slots__ = ['_host','_port','_user','_password','_conn'] def __init__(self, host, port, user, password): self._host = host self._port = port self._user = user self._password = password def connect(self): self._conn = Accumulo(host=self._host, port=self._port, user=self._user, password=self._password) def query(self, plenario_session_state): dataset = plenario_session_state.get_dataset() if dataset != "dat_master": raise Exception("Querying currently only supported on dat_master!") ngon = plenario_session_state.get_ngon() if ngon is None: raise Exception("You must have an N-gon selected!") if len(ngon) != 5: raise Exception("Querying currently only supported for a 5-gon!") start_date = plenario_session_state.get_start_date() end_date = plenario_session_state.get_end_date() date_aggr = plenario_session_state.get_date_aggr() p0 = ngon[0] p1 = ngon[1] p2 = ngon[2] p3 = ngon[3] p4 = ngon[4] p0 = (41.88, -87.64) p1 = (41.89, -87.64) p2 = (41.89, -87.63 ) p3 = (41.88, -87.63) p4 = (41.88, -87.635) min_gh = geohash.encode(-89.9,-179.9) max_gh = geohash.encode(89.9,179.9) cells = [] for cell in self._conn.batch_scan("dat_master", numthreads=10, scanranges=[Range(srow=min_gh, erow=max_gh)]): cells.append(cell) # Grouping key-value pairs that belong to same entry rows = {} for cell in cells: if cell.cq not in rows: rows[cell.cq] = {} rows[cell.cq]['ghash'] = cell.row rows[cell.cq][cell.cf] = cell.val rows[cell.cq][cell.cf] = cell.val # Filter out those that are not in the temporal range start_date = datetime.strptime(start_date, "%m/%d/%Y") end_date = datetime.strptime(end_date, "%m/%d/%Y") rows_filtered = {} for key, val in rows.iteritems(): obs_date = datetime.strptime(val['obs_date'], "%Y-%m-%d %H:%M:%S") if start_date <= obs_date and obs_date <= end_date: rows_filtered[key] = val # Filter out those that are not in the spatial range, i.e. within the polygon rows = rows_filtered rows_filtered = {} #poly = Polygon([p0,p1,p2,p3,p4]) poly = Polygon([(-90,-180),(90,-180),(90,180),(-90,180)]) for key, val in rows.iteritems(): ghash = val['ghash'] pt = Point(geohash.decode(ghash)) if poly.contains(pt): rows_filtered[key] = val # Truncate date as specified by date_aggr and count group sizes rows = rows_filtered rows_filtered = {} for key, val in rows.iteritems(): date = truncate(datetime.strptime(val['obs_date'], "%Y-%m-%d %H:%M:%S"),date_aggr) tup = (val['dataset_name'],date.isoformat()) if tup not in rows_filtered: rows_filtered[tup] = 1 else: rows_filtered[tup] = rows_filtered[tup]+1 return rows_filtered
class EzRPCertStore(object): """ Wrapper class to underlying database store which hold server certs for reverse proxy """ def __init__(self, host='localhost', port=42424, user='******', password='******', table='ezfrontend', privateKey=None, logger=None): self.__table = table self.__signer = None self.__dbConnection = None self.__cf = "pfx" self.__cq = "enc" if logger is not None: self.__logger = logger else: self.__logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__) self.__logger.addHandler(logging.NullHandler()) if privateKey is not None: self.__updateSigner(privateKey) self.__connectToAccumulo(host, port, user, password) def __connectToAccumulo(self, host, port, user, password): try: self.__dbConnection = Accumulo(host, port, user, password) self.__logger.debug('Successfully connected to CertStore') except Exception as ex: self.__logger.exception('Error in connecting to CertStore: %s' % str(ex)) raise EzRPCertStoreException('Error in connecting to CertStore: %s' % str(ex)) def __updateSigner(self, privateKey): with open(privateKey) as file: self.__signer = PKCS1_v1_5.new(RSA.importKey(file.read())) self.__logger.info('Updated signer for CertStore') def __ensureTable(self): if not self.__dbConnection.table_exists(self.__table): self.__logger.info('DB table %s doesn\'t exist in the Store. Creating ...' % self.__table) self.__dbConnection.create_table(self.__table) if not self.__dbConnection.table_exists(self.__table): self.__logger.error('Unable to ensure DB table exists in the Store.') raise EzRPCertStoreException('CertStore: Unable to ensure DB table exists in the Store.') def _generatePassword(self, serverName): password = '******' #salt if self.__signer is None: password = base64.b64encode(password + serverName) else: digest = SHA256.new(password + serverName) signature = self.__signer.sign(digest) password = base64.b64encode(signature) return password def _generatePkcs12(self, serverName, certContents, keyContents, password=None): key = OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, keyContents) cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, certContents) pfx = OpenSSL.crypto.PKCS12() pfx.set_certificate(cert) pfx.set_privatekey(key) return pfx.export(passphrase=password) def _retrieveCertAndKey(self, pfx, serverName, password=None): p12 = OpenSSL.crypto.load_pkcs12(pfx, password) keycontents = OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, p12.get_privatekey()) certContents = OpenSSL.crypto.dump_certificate( OpenSSL.crypto.FILETYPE_PEM, p12.get_certificate()) return certContents, keycontents def put(self, serverName, certContents, keyContents): self.__ensureTable() writer = self.__dbConnection.create_batch_writer(self.__table) value = self._generatePkcs12(serverName, certContents, keyContents, self._generatePassword(serverName)) mutation = Mutation(serverName) mutation.put(cf=self.__cf, cq=self.__cq, val=value) writer.add_mutation(mutation) writer.close() self.__logger.debug('added cert/key contents for %s to store' % serverName) def get(self, serverName): self.__ensureTable() for entry in self.__dbConnection.scan(self.__table, cols=[[self.__cf, self.__cq]]): if entry.row == serverName: self.__logger.debug('retrieved cert/key for %s from store' % serverName) return self._retrieveCertAndKey(entry.val, serverName, self._generatePassword(serverName)) return None, None def remove(self, serverName): self.__ensureTable() writer = self.__dbConnection.create_batch_writer(self.__table) mutation = Mutation(serverName) mutation.put(cf=self.__cf, cq=self.__cq, is_delete=True) writer.add_mutation(mutation) writer.close() self.__logger.debug('removed cert/key for %s from store' % serverName) def exists(self, serverName): self.__ensureTable() #use a sigle row range to narrow our scan range = Range(srow=serverName, scf=self.__cf, scq=self.__cq, erow=serverName, ecf=self.__cf, ecq=self.__cq) for entry in self.__dbConnection.scan(self.__table, scanrange=range): if entry.row == serverName: self.__logger('cert/key for %s exists in store' % serverName) return True self.__logger('cert/key for %s DOES NOT exist in store' % serverName) return False
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from pyaccumulo import Accumulo from pyaccumulo.objects import Mutation from pyaccumulo.iterators import * import settings conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) table = "regexes" if conn.table_exists(table): conn.delete_table(table) conn.create_table(table) wr = conn.create_batch_writer(table) license_file = "LICENSE" linenum = 0 with open(license_file) as infile: for line in infile: linenum += 1
import numpy as np import geohash import datetime as dt import random import time from shapely.geometry import Polygon from shapely.geometry import Point # Import Accumulo from pyaccumulo import Accumulo, Mutation, Range select_data = pd.read_csv("/home/ubuntu/select_data.csv") # Connecting to Accumulo conn = Accumulo(host="172.31.3.218",port=42424,user="******",password="******") table = "Plenario_data" conn.create_table(table) # Writing Mutation wr = conn.create_batch_writer(table) for num in range(select_data.shape[0]): if (num%100000==0): print num m = Mutation(str(select_data.get_value(num,"Geohash"))) # A mutation is an object that represents a row in the Accumulo Table m.put(cf=str(select_data.get_value(num,"Formated_date")), val=select_data.get_value(num,"Descript")) # m.put(cf="cf2", val="%d"%num) # Adding the row to the table
from shapely.geometry import Point from simulate_try import simulate_try def geohash_min_max(polyon): x1, y1, x2, y2 = polyon.bounds return (min(geohash.encode(x1, y2), geohash.encode(x1, y2), geohash.encode(x2, y1), geohash.encode(x2, y2)), max(geohash.encode(x1, y2), geohash.encode(x1, y2), geohash.encode(x2, y1), geohash.encode(x2, y2))) # Import Accumulo from pyaccumulo import Accumulo, Mutation, Range # Connecting to Accumulo conn = Accumulo(host="172.31.3.218",port=42424,user="******",password="******") table = "Plenario_data" # poly = Polygon([(37.795542, -122.423058), # (37.800019, -122.398853), # (37.789302, -122.38821), # (37.7737, -122.39542), # (37.770036, -122.417736)]) now_latlong_1 = "37.795542, -122.423058" now_latlong_2 = "37.800019, -122.398853" now_latlong_3 = "37.789302, -122.38821" now_latlong_4 = "37.78121, -122.39212" now_latlong_5 = "37.770036, -122.417736" for i in range(0,20): poly = Polygon(simulate_try())
""" Reads lines from a data file and inserts a number of records """ from pyaccumulo import Accumulo, Mutation, Range import settings import sys sys.path sys.path.append('/bdsetup') table = "well_logs" table1 = "drill_logs" conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) if conn.table_exists(table): conn.delete_table(table) conn.create_table(table) wr = conn.create_batch_writer(table) print "Ingesting some data ..." f = open("/bdsetup/acculog.txt", "rb") for i in range(250): line = f.readline().rstrip() label = '%04d' % i mut = Mutation('r_%s' % label) mut.put(cq='cq1', val=line) #mut.put(cf='cf_%s'%label, cq='cq1', val=line)
# (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from pyaccumulo import Accumulo, Mutation, Range from pyaccumulo.iterators import * import settings conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) table = "analytics" if conn.table_exists(table): conn.delete_table(table) conn.create_table(table) summing = SummingCombiner(priority=10) summing.add_column("sum") summing.add_column("count") summing.attach(conn, table) sumarray = SummingArrayCombiner(priority=11) sumarray.add_column("histo") sumarray.attach(conn, table)