def create_table(conn, name, start, stop): if conn.table_exists(name): conn.delete_table(name) logging.debug('Creating table "{}"...'.format(name)) conn.create_table(name) writer = conn.create_batch_writer(name) logging.debug('Writing mutations...') for i in range(start, stop): if name == 'uints': value = '{0:06d}'.format(i) elif name == 'ints': value = '{0:06d}'.format(i) elif name == 'floats': value = '{0:07f}'.format(i + 0.5) elif name == 'strings': value = 'xxx' + str(i) elif name == 'missing_data': if i % 2 == 0: value = 'NA' elif i % 3 == 0: value = 'nan' else: value = '{0:06d}'.format(i) else: raise ValueError('invalid table name') m = pyaccumulo.Mutation('row{0:06d}'.format(i - start).encode('utf-8')) m.put(cf='f{0:06d}'.format(i - start).encode('utf-8'), cq='q{0:06d}'.format(i - start).encode('utf-8'), val=value.encode('utf-8')) #logging.debug('Adding Mutation(row={}, updates={})...'.format(m.row, [u.value for u in m.updates])) writer.add_mutation(m) writer.close()
def _write_file(self, uuid, md5, size, path, full_path): with open(full_path, 'rb') as f: data = f.read() util.check_uuid(uuid) util.check_md5(md5) if size != len(data): raise ValueError('size of file {}: {} != {}'.format( path, len(data), size)) util.check_md5_hash(data, md5) if not path.startswith(uuid + '/'): if path.startswith('latest/') or path.startswith('content'): path = uuid + '/' + path else: raise ValueError('path does not start with uuid or svn/git') m = pyaccumulo.Mutation(md5) m.put(cf='file|project', cq=uuid) m.put(cf='file|path', cq=path) if md5 not in self.hashes: m.put(cf='file|size', cq='size', val=str(size)) m.put(cf='file|content', cq='content', val=data) m.put(cf='twosix', cq='tags') # false self.hashes.add(md5) #m.put(cf='file|count', cq='count', value=TODO) #m.put(cf='twosix|tag', cq='recursion', val='0.443') self.wr.add_mutation(m)
def mutation_from_kv_tuple(tup): """ Define a pyaccumulo mutation from a (key, value) formatted tuple. """ row, cv, cf, cq, val = tup print 'Inserting entry: \n Row - %s,\n Column_Visibility - %s,\n Column_Family - %s,\n Column_Qualifier - %s,\n Value - %s' % (row,cv,cf,cq,val) m = pyaccumulo.Mutation(row) m.put(cf=cf, cq=cq, cv=cv, val=val) return m
def mutation_from_kv_tuple(tup, vis=''): """ Define a pyaccumulo mutation from a (key, value) formatted tuple. """ row, val = tup print 'Inserting signed row: %s, value: %s' % (row, val) m = pyaccumulo.Mutation(row) m.put(cf='', cq='', cv=vis, val=val) return m
def insert_data_accumulo(self, csvData, fields, name, host, port, user, password): conn = self.get_connection_accumulo(host, port, user, password) if not conn.table_exists(name): return Util.error_msg("Accumulo table " + name + " does not exist") wr = conn.create_batch_writer(name) fieldsLen = len(fields) rowNum = 1 for row in csvData: m = pyaccumulo.Mutation(str(rowNum)) for i in range(0, fieldsLen): field = fields[i] m.put(str(i + 1), field, str(row[i])) wr.add_mutation(m) rowNum += 1 wr.close() return Util.success_msg()
def add_project(self, uuid): util.check_uuid(uuid) m = pyaccumulo.Mutation(uuid) m.put(cf="", cq="") self.conn.write(self.project_table, m) self.projects.add(uuid)