def test_column_ne(): '''Checks column not equal operation in ColumnMatchFilter''' fam1 = java.lang.String('fam1') col1 = java.lang.String('col1') col2 = java.lang.String('col2') fam1col1 = java.lang.String.format("%s:%s", [fam1, col1]) colval = java.lang.String('myvalue') mismatchval = java.lang.String('secondvalue') rowkey1 = Bytes.toBytes( java.lang.String('row1') ) colfilt = ColumnMatchFilter(Bytes.toBytes(fam1col1), ColumnMatchFilter.CompareOp.NOT_EQUAL, PBUtil.toBytes(colval), True) row1 = KeyValue(rowkey1, Bytes.toBytes(fam1), Bytes.toBytes(col1), PBUtil.toBytes(colval)) row_ne = KeyValue(rowkey1, Bytes.toBytes(fam1), Bytes.toBytes(col1), PBUtil.toBytes(mismatchval)) row_missing = KeyValue(rowkey1, Bytes.toBytes(fam1), Bytes.toBytes(col2), PBUtil.toBytes(colval)) colfilt.filterKeyValue(row1) assertTrue( colfilt.filterRow(), "Row with matching value should be filtered" ) colfilt.reset() colfilt.filterKeyValue(row_ne) assertFalse( colfilt.filterRow(), "Row with mismatched value should not be filtered" ) colfilt.reset() colfilt.filterKeyValue(row_missing) assertTrue( colfilt.filterRow(), "Row missing column should be filtered" ) colfilt.reset() # test row missing column without 'filter if missing' flag colfilt = ColumnMatchFilter(Bytes.toBytes(fam1col1), ColumnMatchFilter.CompareOp.NOT_EQUAL, PBUtil.toBytes(colval), False) colfilt.filterKeyValue(row_missing) assertFalse( colfilt.filterRow(), "Row missing column should not be filtered without flag" ) colfilt.reset()
def update_row_proc (table,key_in,population_in): print "update_row_proc" print key_in print population_in pp = Put(Bytes.toBytes(key_in)) # ss_population = Integer.toString (population_in) ss_population = "%d" % population_in pp.add(Bytes.toBytes("population"), "",Bytes.toBytes(ss_population)) # today = strftime ("%Y-%m-%d",localtime ()) pp.add(Bytes.toBytes("date_mod"), "", Bytes.toBytes(today)) table.put (pp)
def save(self, rowkey, vals, ts=None): key = java_str(rowkey) rowup = Put( Bytes.toBytes(key) ) if ts is not None: rowup.setTimestamp(ts) for k, v in vals.items(): (fam, col) = KeyValue.parseColumn( Bytes.toBytes( java_str(k) ) ) if isinstance(v, com.google.protobuf.Message): rowup.add(fam, col, v.toByteArray()) else: rowup.add(fam, col, PBUtil.toBytes(v)) self.table.put(rowup)
def save(self, rowkey, vals, ts=None): key = java_str(rowkey) rowup = Put(Bytes.toBytes(key)) if ts is not None: rowup.setTimestamp(ts) for k, v in vals.items(): (fam, col) = KeyValue.parseColumn(Bytes.toBytes(java_str(k))) if isinstance(v, com.google.protobuf.Message): rowup.add(fam, col, v.toByteArray()) else: rowup.add(fam, col, PBUtil.toBytes(v)) self.table.put(rowup)
def scan_apply(self, cols, startrow=None, limit=50, filter=None, rowfunc=None): rows = [] if startrow is None: startrow = HConstants.EMPTY_START_ROW else: startrow = Bytes.toBytes( java_str(startrow) ) scan = Scan(startrow) if filter is not None: scan.setFilter(filter) for c in cols: scan.addColumn(c) cnt = 0 scanner = None try: scanner = self.table.getScanner(scan) for rec in scanner: if limit is not None and cnt >= limit: break rowfunc(rec) cnt += 1 finally: if scanner is not None: try: scanner.close() except: pass return
def eq(colname, colvalue): namebytes = Bytes.toBytes( java.lang.String(colname) ) if isinstance(colvalue, com.google.protobuf.Message): valbytes = colvalue.toByteArray() else: valbytes = PBUtil.toBytes( colvalue ) return ColumnMatchFilter( namebytes, ColumnMatchFilter.CompareOp.EQUAL, valbytes )
def eq(colname, colvalue): namebytes = Bytes.toBytes(java.lang.String(colname)) if isinstance(colvalue, com.google.protobuf.Message): valbytes = colvalue.toByteArray() else: valbytes = PBUtil.toBytes(colvalue) return ColumnMatchFilter(namebytes, ColumnMatchFilter.CompareOp.EQUAL, valbytes)
def getUpdate(self, row): up = Put(row.getRow()) for k, v in self.replace.items(): (fam, col) = KeyValue.parseColumn( Bytes.toBytes( java.lang.String(k) ) ) if isinstance(v, com.google.protobuf.Message): up.add(fam, col, v.toByteArray()) else: up.add(fam, col, PBUtil.toBytes(v)) return up
def getUpdate(self, row): up = Put(row.getRow()) for k, v in self.replace.items(): (fam, col) = KeyValue.parseColumn(Bytes.toBytes(java.lang.String(k))) if isinstance(v, com.google.protobuf.Message): up.add(fam, col, v.toByteArray()) else: up.add(fam, col, PBUtil.toBytes(v)) return up
def test_column_ne(): '''Checks column not equal operation in ColumnMatchFilter''' fam1 = java.lang.String('fam1') col1 = java.lang.String('col1') col2 = java.lang.String('col2') fam1col1 = java.lang.String.format("%s:%s", [fam1, col1]) colval = java.lang.String('myvalue') mismatchval = java.lang.String('secondvalue') rowkey1 = Bytes.toBytes(java.lang.String('row1')) colfilt = ColumnMatchFilter(Bytes.toBytes(fam1col1), ColumnMatchFilter.CompareOp.NOT_EQUAL, PBUtil.toBytes(colval), True) row1 = KeyValue(rowkey1, Bytes.toBytes(fam1), Bytes.toBytes(col1), PBUtil.toBytes(colval)) row_ne = KeyValue(rowkey1, Bytes.toBytes(fam1), Bytes.toBytes(col1), PBUtil.toBytes(mismatchval)) row_missing = KeyValue(rowkey1, Bytes.toBytes(fam1), Bytes.toBytes(col2), PBUtil.toBytes(colval)) colfilt.filterKeyValue(row1) assertTrue(colfilt.filterRow(), "Row with matching value should be filtered") colfilt.reset() colfilt.filterKeyValue(row_ne) assertFalse(colfilt.filterRow(), "Row with mismatched value should not be filtered") colfilt.reset() colfilt.filterKeyValue(row_missing) assertTrue(colfilt.filterRow(), "Row missing column should be filtered") colfilt.reset() # test row missing column without 'filter if missing' flag colfilt = ColumnMatchFilter(Bytes.toBytes(fam1col1), ColumnMatchFilter.CompareOp.NOT_EQUAL, PBUtil.toBytes(colval), False) colfilt.filterKeyValue(row_missing) assertFalse(colfilt.filterRow(), "Row missing column should not be filtered without flag") colfilt.reset()
def get(self, rowkey): ''' Retrieves the specific table row as a dictionary, with full column names (column family:name) as keys, and deserialized (from protocol buffers) values as values. If the row does not exist, returns None. ''' op = Get( Bytes.toBytes( java_str(rowkey) ) ) row = self.table.get(op) if row is not None and not row.isEmpty(): return todict(row) return None
def get(self, rowkey): ''' Retrieves the specific table row as a dictionary, with full column names (column family:name) as keys, and deserialized (from protocol buffers) values as values. If the row does not exist, returns None. ''' op = Get(Bytes.toBytes(java_str(rowkey))) row = self.table.get(op) if row is not None and not row.isEmpty(): return todict(row) return None
def scan_apply(self, cols, startrow=None, limit=50, filter=None, rowfunc=None): rows = [] if startrow is None: startrow = HConstants.EMPTY_START_ROW else: startrow = Bytes.toBytes(java_str(startrow)) scan = Scan(startrow) if filter is not None: scan.setFilter(filter) for c in cols: scan.addColumn(c) cnt = 0 scanner = None try: scanner = self.table.getScanner(scan) for rec in scanner: if limit is not None and cnt >= limit: break rowfunc(rec) cnt += 1 finally: if scanner is not None: try: scanner.close() except: pass return
def getVersions(self, rowkey, limit=None, ts=None): ''' Retrieves up to _limit_ versions of the specified row, at or before the specified timestamp (_ts_). If _ts_ is None or not specified, defaults to now. ''' op = Get( Bytes.toBytes(java_str(rowkey)) ) if ts is not None: op.setTimeRange( 0, ts ) if limit is not None: op.setMaxVersions(limit) row = self.table.get(op) versions = [] if row is not None and not row.isEmpty(): for e in row.list(): col = str(java.lang.String(e.getColumn())) colts = e.getTimestamp() val = PBUtil.toValue( e.getValue() ) versions.append( (colts, {col: val} ) ) return versions
def sync_region_info(self, id_region=None): if id_region is None: id_region = self.id_region meta_table = HTable(self.conf, HConstants.META_TABLE_NAME) gobj = Get(Bytes.toBytes(id_region)) result = meta_table.get(gobj) bytes = result.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER) hri = None try: hri = Writables.getHRegionInfo(bytes) except java.lang.NullPointerException: raise PHBaseException("could to retrieve region info for %s" % (id_region)) self.start_key = jls(hri.getStartKey()) self.end_key = jls(hri.getEndKey()) self.name = jls(hri.getRegionName()) self.table_name = HTable(hri.getTableDesc().getName()) self.offline = hri.isOffline() self.obj = hri
def getVersions(self, rowkey, limit=None, ts=None): ''' Retrieves up to _limit_ versions of the specified row, at or before the specified timestamp (_ts_). If _ts_ is None or not specified, defaults to now. ''' op = Get(Bytes.toBytes(java_str(rowkey))) if ts is not None: op.setTimeRange(0, ts) if limit is not None: op.setMaxVersions(limit) row = self.table.get(op) versions = [] if row is not None and not row.isEmpty(): for e in row.list(): col = str(java.lang.String(e.getColumn())) colts = e.getTimestamp() val = PBUtil.toValue(e.getValue()) versions.append((colts, {col: val})) return versions
def update(self, startrow, wherefilt, sets): if startrow is None: startrow = HConstants.EMPTY_START_ROW elif isinstance(startrow, java.lang.String): startrow = Bytes.toBytes(startrow) updater = ColumnUpdate(sets) cols = self.families() cnt = 0 upcnt = 0 scan = Scan(startrow) for c in cols: scan.addColumn(c) if wherefilt is not None: scan.setFilter(filter) scanner = None try: scanner = self.table.getScanner(scan) for rec in scanner: cnt += 1 rowup = updater.getUpdate(rec) if rowup is not None: self.table.commit(rowup) upcnt += 1 finally: if scanner is not None: try: scanner.close() except: pass return upcnt
prev_region = curr_region = None regions = [] start_keys = {} end_keys = {} while True: errors = 0 result = scanner.next() if not result: # end of table break rowid = Bytes.toString(result.getRow()) rowidStr = jls(rowid) bytes = result.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER) try: curr_region = Writables.getHRegionInfo(bytes) except java.lang.NullPointerException: print >> sys.stderr, 'ERROR: %s error while reading region info' % \ (jls(result.getRow())) errors += 1 prev_region = None continue s_key = jls(curr_region.getStartKey())
def deleteAll(self, rows): for rec in rows: if '__key__' in rec: op = Delete(Bytes.toBytes(java_str(rec['__key__']))) self.table.delete(op)
def delete(self, rowkey): op = Delete(Bytes.toBytes(java_str(rowkey))) self.table.delete(op)
from org.apache.hadoop.hbase import HBaseConfiguration from org.apache.hadoop.hbase.client import HTable, Get from org.apache.hadoop.hbase.util import Bytes conf = HBaseConfiguration.create() table = HTable(conf, "test") get = Get(Bytes.toBytes('row1')) result = table.get(get) value = result.getValue(Bytes.toBytes('cf'), Bytes.toBytes('col1')) print Bytes.toString(value, 0, len(value))
id_in = sys.argv[1] print ("%s" % id_in) # conf = HBaseConfiguration() table = HTable(conf, "cities") delete_row_proc (table,id_in) ss = Scan () ss.addColumn ("name","") ss.addColumn ("population","") ss.addColumn ("date_mod","") scanner = table.getScanner(ss) while 1: result = scanner.next() if not result: break key = java.lang.String(result.row) print key,"\t", vv = java.lang.String(result.getValue(Bytes.toBytes("name"),"")) print vv,"\t", pp = java.lang.String(result.getValue(Bytes.toBytes("population"),"")) print pp,"\t", dd = java.lang.String(result.getValue(Bytes.toBytes("date_mod"),"")) print dd # print ("*** 終了 ***") # ----------------------------------------------------------------
def delete_row_proc (table,key_in): print "delete_row_proc" print key_in dd = Delete(Bytes.toBytes(key_in)) table.delete (dd)
def delete(self, rowkey): op = Delete( Bytes.toBytes( java_str(rowkey) ) ) self.table.delete( op )
def deleteAll(self, rows): for rec in rows: if '__key__' in rec: op = Delete( Bytes.toBytes( java_str( rec['__key__'] ) ) ) self.table.delete( op )