import sys sys.path sys.path.append('/bdsetup') table = "well_logs" table1 = "drill_logs" conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) if conn.table_exists(table): conn.delete_table(table) conn.create_table(table) wr = conn.create_batch_writer(table) print "Ingesting some data ..." f = open("/bdsetup/acculog.txt", "rb") for i in range(250): line = f.readline().rstrip() label = '%04d' % i mut = Mutation('r_%s' % label) mut.put(cq='cq1', val=line) #mut.put(cf='cf_%s'%label, cq='cq1', val=line) #mut.put(cf='cf_%s'%label, cq='cq2', val=line) wr.add_mutation(mut) i += 1 wr.close() if conn.table_exists(table1):
class EzRPCertStore(object): """ Wrapper class to underlying database store which hold server certs for reverse proxy """ def __init__(self, host='localhost', port=42424, user='******', password='******', table='ezfrontend', privateKey=None, logger=None): self.__table = table self.__signer = None self.__dbConnection = None self.__cf = "pfx" self.__cq = "enc" if logger is not None: self.__logger = logger else: self.__logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__) self.__logger.addHandler(logging.NullHandler()) if privateKey is not None: self.__updateSigner(privateKey) self.__connectToAccumulo(host, port, user, password) def __connectToAccumulo(self, host, port, user, password): try: self.__dbConnection = Accumulo(host, port, user, password) self.__logger.debug('Successfully connected to CertStore') except Exception as ex: self.__logger.exception('Error in connecting to CertStore: %s' % str(ex)) raise EzRPCertStoreException('Error in connecting to CertStore: %s' % str(ex)) def __updateSigner(self, privateKey): with open(privateKey) as file: self.__signer = PKCS1_v1_5.new(RSA.importKey(file.read())) self.__logger.info('Updated signer for CertStore') def __ensureTable(self): if not self.__dbConnection.table_exists(self.__table): self.__logger.info('DB table %s doesn\'t exist in the Store. Creating ...' % self.__table) self.__dbConnection.create_table(self.__table) if not self.__dbConnection.table_exists(self.__table): self.__logger.error('Unable to ensure DB table exists in the Store.') raise EzRPCertStoreException('CertStore: Unable to ensure DB table exists in the Store.') def _generatePassword(self, serverName): password = '******' #salt if self.__signer is None: password = base64.b64encode(password + serverName) else: digest = SHA256.new(password + serverName) signature = self.__signer.sign(digest) password = base64.b64encode(signature) return password def _generatePkcs12(self, serverName, certContents, keyContents, password=None): key = OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, keyContents) cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, certContents) pfx = OpenSSL.crypto.PKCS12() pfx.set_certificate(cert) pfx.set_privatekey(key) return pfx.export(passphrase=password) def _retrieveCertAndKey(self, pfx, serverName, password=None): p12 = OpenSSL.crypto.load_pkcs12(pfx, password) keycontents = OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, p12.get_privatekey()) certContents = OpenSSL.crypto.dump_certificate( OpenSSL.crypto.FILETYPE_PEM, p12.get_certificate()) return certContents, keycontents def put(self, serverName, certContents, keyContents): self.__ensureTable() writer = self.__dbConnection.create_batch_writer(self.__table) value = self._generatePkcs12(serverName, certContents, keyContents, self._generatePassword(serverName)) mutation = Mutation(serverName) mutation.put(cf=self.__cf, cq=self.__cq, val=value) writer.add_mutation(mutation) writer.close() self.__logger.debug('added cert/key contents for %s to store' % serverName) def get(self, serverName): self.__ensureTable() for entry in self.__dbConnection.scan(self.__table, cols=[[self.__cf, self.__cq]]): if entry.row == serverName: self.__logger.debug('retrieved cert/key for %s from store' % serverName) return self._retrieveCertAndKey(entry.val, serverName, self._generatePassword(serverName)) return None, None def remove(self, serverName): self.__ensureTable() writer = self.__dbConnection.create_batch_writer(self.__table) mutation = Mutation(serverName) mutation.put(cf=self.__cf, cq=self.__cq, is_delete=True) writer.add_mutation(mutation) writer.close() self.__logger.debug('removed cert/key for %s from store' % serverName) def exists(self, serverName): self.__ensureTable() #use a sigle row range to narrow our scan range = Range(srow=serverName, scf=self.__cf, scq=self.__cq, erow=serverName, ecf=self.__cf, ecq=self.__cq) for entry in self.__dbConnection.scan(self.__table, scanrange=range): if entry.row == serverName: self.__logger('cert/key for %s exists in store' % serverName) return True self.__logger('cert/key for %s DOES NOT exist in store' % serverName) return False
from shapely.geometry import Polygon from shapely.geometry import Point # Import Accumulo from pyaccumulo import Accumulo, Mutation, Range select_data = pd.read_csv("/home/ubuntu/select_data.csv") # Connecting to Accumulo conn = Accumulo(host="172.31.3.218",port=42424,user="******",password="******") table = "Plenario_data" conn.create_table(table) # Writing Mutation wr = conn.create_batch_writer(table) for num in range(select_data.shape[0]): if (num%100000==0): print num m = Mutation(str(select_data.get_value(num,"Geohash"))) # A mutation is an object that represents a row in the Accumulo Table m.put(cf=str(select_data.get_value(num,"Formated_date")), val=select_data.get_value(num,"Descript")) # m.put(cf="cf2", val="%d"%num) # Adding the row to the table wr.add_mutation(m) wr.close()
class EzRPStaticStore(object): ''' Class to save and retrieve static content from Accumulo. cf = "static" For all rows cq = "hash" Stores the hash_value of Static File cq = "nofchunks" Stores the number of Chunks needed to store Static File cq = "chunk_000" .. "chunk_nnn" Stores the Chunks of Static File ''' def __init__(self, host="localhost", port=42424, user='******', password='******', chunk_size=int(5*1048576), logger=None): self.__host = host self.__port = port self.__user = user self.__password = password self.__table = 'ezfrontend' self.__cf = 'static' self.__connection = None if logger is not None: self.__log = logger else: self.__log = logging.getLogger(self.__module__ + '.' + self.__class__.__name__) self.__log.addHandler(logging.NullHandler()) self.__chunk_size =int(chunk_size) self._connect(self.__host, self.__port, self.__user, self.__password) def _connect(self, host, port, user, password): try: self.__connection = Accumulo(host, port, user, password) self.__log.debug('Connected to StaticFile Store') except Exception as e: self.__log.exception('Error while connecting to StaticFile Store: %s' % str(e)) raise Exception('Error while connecting to StaticFile Store: %s' % str(e)) def _ensureTableExists(self): ''' Make sure that the table exists before any other operation. Reconnect to Accumulo if the Connection is reset. ''' if not self.__connection.table_exists(self.__table): self.__log.info('table "{table}" does not exist in StaticFile Store. Creating the table'.format(table=self.__table)) self.__connection.create_table(self.__table) if not self.__connection.table_exists(self.__table): self.__log.error('Unable to ensure StaticFile Store table "{table} exists'.format(format(table=self.__table))) raise Exception('StaticFile Store: Unable to ensure table "{table}" exists'.format(table=self.__table)) def _ensureNoDuplicates(self, usrFacingUrlPrefix): ''' Ensure a single copy of file for a given usrFacingUrlPrefix ''' if self._getHash(usrFacingUrlPrefix) is not None: self.deleteFile(usrFacingUrlPrefix) def _putNofChunks(self, usrFacingUrlPrefix, length): ''' Put the number of chunks the static contents is stored ''' chunks = int(math.ceil(length / float(self.__chunk_size))) writer = self.__connection.create_batch_writer(self.__table) m = Mutation(usrFacingUrlPrefix) m.put(cf=self.__cf, cq="nofchunks", val=str(chunks)) writer.add_mutation(m) writer.close() def _getNofChunks(self, usrFacingUrlPrefix): ''' Get the number of chunks the static contents is stored ''' scan_range = Range(srow=usrFacingUrlPrefix, scf=self.__cf, scq="nofchunks", erow=usrFacingUrlPrefix, ecf=self.__cf, ecq="nofchunks") for entry in self.__connection.scan(self.__table, scanrange=scan_range): return int(entry.val) return 0 def _getChunks(self, data): ''' Break the blob into CHUNK_SIZE. less than maxFrameSize in Accumulo proxy.properties ''' data_length = len(data) for i in range(0, data_length + 1, self.__chunk_size): yield data[i:i + self.__chunk_size] def _putHash(self, usrFacingUrlPrefix, hash_str): ''' Puts the Hash for usrFacingUrlPrefix ''' writer = self.__connection.create_batch_writer(self.__table) m = Mutation(usrFacingUrlPrefix) m.put(cf=self.__cf, cq="hash", val=hash_str) writer.add_mutation(m) writer.close() def _getHash(self, usrFacingUrlPrefix): scan_range = Range(srow=usrFacingUrlPrefix, scf=self.__cf, scq="hash", erow=usrFacingUrlPrefix, ecf=self.__cf, ecq="hash") for entry in self.__connection.scan(self.__table, scanrange=scan_range): return str(entry.val) else: return None def reConnection(self): self._connect(self.__host, self.__port, self.__user, self.__password) def putFile(self, usrFacingUrlPrefix, hash_str, data): self._ensureTableExists() self._ensureNoDuplicates(usrFacingUrlPrefix) self._putHash(usrFacingUrlPrefix, hash_str) data_length = len(data) self._putNofChunks(usrFacingUrlPrefix, data_length) writer = self.__connection.create_batch_writer(self.__table) for i, chunk in enumerate(self._getChunks(data)): m = Mutation(usrFacingUrlPrefix) m.put(cf=self.__cf, cq="chunk_{number:010d}".format(number=i), val=chunk) writer.add_mutation(m) self.__log.debug('added static file for "{url}" with hash "{hash}" of length "{length}"'.format(url=usrFacingUrlPrefix, hash=hash_str, length=data_length)) writer.close() def getFile(self, usrFacingUrlPrefix): ''' Assembles all the chunks for this row ''' self._ensureTableExists() data = array.array('c') # Create a byte array chunks = self._getNofChunks(usrFacingUrlPrefix) chunks_read = 0 for i in range(chunks): cq = 'chunk_{number:010d}'.format(number=i) for entry in self.__connection.scan(self.__table, None, cols=[[self.__cf, cq]]): if entry.row == usrFacingUrlPrefix and entry.cq.startswith("chunk_"): chunks_read += 1 data.extend(entry.val) # This code gets following error while retrieving over 96MB. Data stops at first chunk_000 # # java.lang.OutOfMemoryError: Java heap space # -XX:OnOutOfMemoryError="kill -9 %p" # Executing /bin/sh -c "kill -9 32597"... # [1]+ Exit 137 sudo -u accumulo /opt/accumulo/current/bin/accumulo proxy -p /opt/accumulo/current/conf/proxy.properties # startChunk = "chunk_{number:010d}".format(number=0) # endChunk = "chunk_{number:010d}".format(number=chunks) # scan_range = Range(srow=usrFacingUrlPrefix, scf=self.__cf, scq=startChunk, # erow=usrFacingUrlPrefix, ecf=self.__cf, ecq=endChunk) # for entry in self.__connection.scan(self.__table, scanrange=scan_range): # #self.__log.info("getFile: row = {0} cq= {1}".format(entry.row, entry.cq)) # if entry.cq.startswith("chunk_"): # self.__log.info("getFile: row = {0} cq= {1}".format(entry.row, entry.cq)) # chunks_read += 1 # data.extend(entry.val) self.__log.debug('retrieved static file for {url}'.format(url=usrFacingUrlPrefix)) if chunks_read != chunks: self.__log.error("did not read all the chunks from StaticFile Store") return data.tostring() if data.buffer_info()[1] > 0 else None def deleteFile(self, usrFacingUrlPrefix): self._ensureTableExists() writer = self.__connection.create_batch_writer(self.__table) chunks = self._getNofChunks(usrFacingUrlPrefix) m = Mutation(usrFacingUrlPrefix) m.put(cf=self.__cf, cq="hash", is_delete=True) m.put(cf=self.__cf, cq="nofchunks", is_delete=True) for i in range(chunks): cq = 'chunk_{number:010d}'.format(number=i) m.put(cf=self.__cf, cq=cq, is_delete=True) writer.add_mutation(m) self.__log.debug('removed static file for {url}'.format(url=usrFacingUrlPrefix)) writer.close() def getAttributes(self): ''' Returns the urlprefix and the hash of all the entries in table as tuple ''' self._ensureTableExists() for entry in self.__connection.scan(self.__table, None, cols=[[self.__cf, "hash"]]): yield (entry.row, str(entry.val)) else: yield (None, None)