def insert(self, values): """ Insert will translate into put :param values: K-V Dict. field `rowkey` is required. while timestamp is optional for setting cells timestamp :return: Nil """ if self.debug: log('[HB-FDW] Insert Begin: ================================') log(values) rowkey = values.get('rowkey') if not rowkey: raise ValueError('[HB-FDW] rowkey should be specified!') timestamp = values.get('timestamp') if timestamp and self.ts_converter: timestamp = self.ts_converter(timestamp) payload = { self.qualifier.get(col_name): str(value) for col_name, value in values.iteritems() if col_name != 'rowkey' and col_name != 'timestamp' } # self.table.delete(rowkey) self.table.put(rowkey, payload, timestamp=timestamp)
def handle_error(self, response): if response['error_description']: error = response['error_description'] else: error = response['error'] log(message=error, level=logging.ERROR)
def __init__(self, options, columns): super(DnbForeignDataWrapper, self).__init__(options, columns) self.columns = columns try: self.x_dnb_user = options['x_dnb_user'] self.x_dnb_pwd = options['x_dnb_pwd'] self.type = options['type'] except KeyError: log(message = 'You must pass an x_dnb_user, x_dnb_pwd and uri', level = ERROR)
def delete(self, document_id: Any) -> None: log("DELETE %s" % (repr(document_id)), logging.DEBUG) row = self.__find_row_by_id(document_id) if row is None: return None self.sheet.delete_row(row)
def __init__(self, options, columns): super(DnbForeignDataWrapper, self).__init__(options, columns) self.columns = columns try: self.x_dnb_user = options['x_dnb_user'] self.x_dnb_pwd = options['x_dnb_pwd'] self.type = options['type'] except KeyError: log(message='You must pass an x_dnb_user, x_dnb_pwd and uri', level=ERROR)
def delete(self, rowkey): """ Delete will translate into del. Notice: Select will be invoked first to locate rowkeys :param rowkey: rowkey to be deleted :return: Nil """ if not rowkey: raise ValueError('[HB-FDW] rowkey should be specified!') if self.debug: log("[HB-FDW] Delete Begin: %s ================================" % rowkey) self.table.delete(rowkey)
def get_token(self): headers = { 'x-dnb-user': self.x_dnb_user, 'x-dnb-pwd': self.x_dnb_pwd } request = post('https://maxcvservices.dnb.com/Authentication/V2.0',headers = headers) response = request.json() if 'error' in response: log(message = 'error happened processing auth', level = ERROR) try: return response['AuthenticationDetail']['Token'] except KeyError: log(message = 'Could not find AuthenticationDetail or Token in response', level = ERROR)
def insert(self, new_values: Dict[str, Any]) -> Dict[str, Any]: log("INSERT %s" % (repr(new_values)), logging.DEBUG) new_values_converted = self.__convert_pg_row(new_values) new_values_to_be_insert = [ new_values_converted.get(c) for c in self.columns ] self.sheet.append_row(values=new_values_to_be_insert, value_input_option=self.value_input_option) return new_values
def get_token(self): headers = {'x-dnb-user': self.x_dnb_user, 'x-dnb-pwd': self.x_dnb_pwd} request = post('https://maxcvservices.dnb.com/Authentication/V2.0', headers=headers) response = request.json() if 'error' in response: log(message='error happened processing auth', level=ERROR) try: return response['AuthenticationDetail']['Token'] except KeyError: log(message= 'Could not find AuthenticationDetail or Token in response', level=ERROR)
def execute(self, quals, columns): # let get the token followed by the number of users userCount = int(self.getUserCount()) for batchStart in range(0, userCount, self.batchSize): log(message=batchStart, level=logging.WARNING) usersData = self.getUsers(batchStart) if 'error' in usersData: self.handle_error(usersData) else: for entry in usersData: entry['organization_id'] = self.organization_id yield entry
def execute(self, quals: List, columns: List) -> Generator: log("EXECUTE %s %s" % (repr(quals), repr(columns)), logging.DEBUG) results = self.sheet.get_all_values('UNFORMATTED_VALUE') headers = results[0] results = map(lambda row: self.__convert_gs_row(row, headers), results[1:]) for result in results: line = {} for column in columns: line[column] = result.get(column) yield line
def getUserCount(self): token = self.getToken() headers = { 'Authorization': 'Bearer %s' % token, 'Content-Type': 'application/json' } getUserCountURL = '%s/auth/admin/realms/%s/users/count' % (self.url, self.realm) userCountResponse = get(getUserCountURL, headers=headers) log(message=userCountResponse.text, level=logging.WARNING) return userCountResponse.text
def getUsers(self, batchStart): token = self.getToken() headers = { 'Authorization': 'Bearer %s' % token, 'Content-Type': 'application/json' } getUsersURL = '%s/auth/admin/realms/%s/users?first=%s&max=%s' % ( self.url, self.realm, batchStart, self.batchSize) usersResponse = get(getUsersURL, headers=headers) log(message=getUsersURL, level=logging.WARNING) usersData = usersResponse.json() return usersData
def details(self, quals, columns): token = self.get_token(); for q in quals: if q.field_name == 'duns': duns = q.value if duns is None: log(message = 'You must pass a duns number', level = ERROR) uri = 'https://maxcvservices.dnb.com/V5.0/organizations/' + duns + '/products/DCP_STD' headers = {'Authorization': token} request = get(uri, headers=headers) response = request.json() if 'error' in response: log(message = 'error happened processing query', level = ERROR) else: row = OrderedDict() row['duns'] = duns row['details'] = dumps(response['OrderProductResponse']['OrderProductResponseDetail']['Product']['Organization']) yield row;
def update(self, rowkey, newvalues): """ Update will invoke select operation to locate rowkey. If a different new rowkey is in `newvalues`, update will act like copy rather than move :param rowkey: rowkey to be updated. :param newvalues: K-V Newvalues :return: Nil """ if not rowkey: raise ValueError('[HB-FDW] rowkey should be specified! ') if self.debug: log('[HB-FDW] Update Begin: %s ================================' % rowkey) log(newvalues) payload = {self.qualifier.get(col_name): str(value) for col_name, value in newvalues.iteritems() if col_name != 'rowkey' and col_name != 'timestamp'} # Given new rowkey in update statement will make a new copy with new rowkey rowkey = newvalues.get('rowkey') or rowkey self.table.put(rowkey, payload)
def query(self, quals, columns): token = self.get_token(); uri = 'https://maxcvservices.dnb.com/V5.0/organizations?match=true&MatchTypeText=Basic&CountryISOAlpha2Code=US' for q in quals: uri += '&' + q.field_name + '=' + q.value headers = {'Authorization': token} request = get(uri, headers=headers) response = request.json() if 'error' in response: log(message = 'error happened processing query', level = ERROR) else: for candidate in response['MatchResponse']['MatchResponseDetail']['MatchCandidate']: row = OrderedDict() for col in columns: for q in quals: if col == q.field_name: row[col] = q.value; row['candidate'] = dumps(candidate) yield row
def __convert_value(self, name: str, value: Any, converters: Dict[int, callable]) -> Any: column_definition = self.columns.get(name) if column_definition is None: return None converter = converters.get(column_definition.type_oid) if converter is None: raise Exception( "Unsupported data type %s (%s)" % (column_definition.type_oid, column_definition.type_name)) try: return converter(value) except ValueError: log( "Invalid value %s for column %s (%s)" % (value, name, column_definition.type_name), logging.WARNING) return None
def query(self, quals, columns): token = self.get_token() uri = 'https://maxcvservices.dnb.com/V5.0/organizations?match=true&MatchTypeText=Basic&CountryISOAlpha2Code=US' for q in quals: uri += '&' + q.field_name + '=' + q.value headers = {'Authorization': token} request = get(uri, headers=headers) response = request.json() if 'error' in response: log(message='error happened processing query', level=ERROR) else: for candidate in response['MatchResponse']['MatchResponseDetail'][ 'MatchCandidate']: row = OrderedDict() for col in columns: for q in quals: if col == q.field_name: row[col] = q.value row['candidate'] = dumps(candidate) yield row
def details(self, quals, columns): token = self.get_token() for q in quals: if q.field_name == 'duns': duns = q.value if duns is None: log(message='You must pass a duns number', level=ERROR) uri = 'https://maxcvservices.dnb.com/V5.0/organizations/' + duns + '/products/DCP_STD' headers = {'Authorization': token} request = get(uri, headers=headers) response = request.json() if 'error' in response: log(message='error happened processing query', level=ERROR) else: row = OrderedDict() row['duns'] = duns row['details'] = dumps( response['OrderProductResponse']['OrderProductResponseDetail'] ['Product']['Organization']) yield row
def insert(self, values): """ Insert will translate into put :param values: K-V Dict. field `rowkey` is required. while timestamp is optional for setting cells timestamp :return: Nil """ if self.debug: log('[HB-FDW] Insert Begin: ================================') log(values) rowkey = values.get('rowkey') if not rowkey: raise ValueError('[HB-FDW] rowkey should be specified!') timestamp = values.get('timestamp') if timestamp and self.ts_converter: timestamp = self.ts_converter(timestamp) payload = {self.qualifier.get(col_name): str(value) for col_name, value in values.iteritems() if col_name != 'rowkey' and col_name != 'timestamp'} # self.table.delete(rowkey) self.table.put(rowkey, payload, timestamp=timestamp)
def update(self, document_id: Any, new_values: Dict[str, Any]) -> Dict[str, Any] or None: log("UPDATE %s %s" % (repr(document_id), repr(new_values)), logging.DEBUG) new_values_converted = self.__convert_pg_row(new_values) row = self.__find_row_by_id(document_id) if row is None: return None cells = [ Cell(row=row, col=self.__find_column_by_name(key), value=val if val is not None else '') for (key, val) in new_values_converted.items() if key != self.rowid_column and key not in self.formula_columns ] self.sheet.update_cells(cell_list=cells, value_input_option=self.value_input_option) return new_values
def update(self, rowkey, newvalues): """ Update will invoke select operation to locate rowkey. If a different new rowkey is in `newvalues`, update will act like copy rather than move :param rowkey: rowkey to be updated. :param newvalues: K-V Newvalues :return: Nil """ if not rowkey: raise ValueError('[HB-FDW] rowkey should be specified! ') if self.debug: log('[HB-FDW] Update Begin: %s ================================' % rowkey) log(newvalues) payload = { self.qualifier.get(col_name): str(value) for col_name, value in newvalues.iteritems() if col_name != 'rowkey' and col_name != 'timestamp' } # Given new rowkey in update statement will make a new copy with new rowkey rowkey = newvalues.get('rowkey') or rowkey self.table.put(rowkey, payload)
def handle_error(self, response): error = response['error']['message'] log(message = error, level = logging.ERROR)
def validate(self, options, columns): if 'organization_id' not in options: log(message='No org id provided', level=logging.ERROR) if 'url' not in options: log(message='No url provided', level=logging.ERROR) if 'username' not in options: log(message='No username provided', level=logging.ERROR) if 'password' not in options: log(message='No password provided', level=logging.ERROR) if 'realm' not in options: log(message='No realm provided', level=logging.ERROR) if 'client_id' not in options: log(message='No client_id provided', level=logging.ERROR) if 'grant_type' not in options: log(message='No grant_type provided', level=logging.ERROR) if 'client_secret' not in options: log(message='No client_secret provided', level=logging.ERROR)
def execute(self, quals, columns, sortkeys=None): """ Query hbase :param quals: list of qualification :param columns: set of required columns :param sortkeys: keys to be sored :return: data dict """ if self.debug: log("[HB-FDW] Query Begin ================================") log("[HB-FDW] Columns : %s" % columns) log("[HB-FDW] Quals : %s" % quals) # quals about rowkey: type of rowkey could be str, list, dict rowkey = None # quals about timestamp ts = None # other filter filter_str = None for qual in quals: # Parse rowkey quals and row filter if qual.field_name == 'rowkey': # single rowkey if qual.operator == '=': rowkey = qual.value if len(columns) == 1: # rowkey only yield {"rowkey": rowkey} return # multiple rowkey elif qual.is_list_operator: rowkey = qual.value if len(columns) == 1: for rk in rowkey: yield {"rowkey": rowkey} # range low bound elif qual.operator == '<=': if isinstance(rowkey, dict): rowkey['until'] = qual.value.encode('utf-8') else: rowkey = {'until': qual.value.encode('utf-8')} # range high bound elif qual.operator == '>=': if isinstance(rowkey, dict): rowkey['since'] = qual.value else: rowkey = {'since': qual.value} # Regex like elif qual.operator == '~': filter_str = "RowFilter(%s, 'regexstring:%s')" % ('=', qual.value) elif qual.operator == '!~': filter_str = "RowFilter(%s, 'regexstring:%s')" % ('!=', qual.value) else: log(qual) raise ValueError("[HB-FDW] Supported operators on rowkey : =,<=,>=,in,any,between") # Parse timestamp quals elif qual.field_name == 'timestamp': # lots of timestamp related function is not supported by happybase. # fetch exactly: ts is a bigint if qual.operator == '=': ts = self.convert_timestamp(qual.value) # timestamp range low bound. ts is a dict with fields: `since` and `until` elif qual.operator == '<=' or qual.operator == '<': if isinstance(rowkey, dict): ts['until'] = self.convert_timestamp(qual.value) else: ts = {'until': self.convert_timestamp(qual.value)} # Upper bound elif qual.operator == '>=' or qual.operator == '>': if isinstance(rowkey, dict): ts['since'] = self.convert_timestamp(qual.value) else: ts = {'since': self.convert_timestamp(qual.value)} # Todo: Normal column condition push down seems useless, Maybe later # happybase special treatment: only < & <= is allowed for timestamp if isinstance(ts, dict) and ts.has_key('until'): ts = ts["until"] # Translate postgres column names into hbase column names qualifiers = [self.qualifier[k] for k in columns if k != 'rowkey' and k != 'timestamp'] # No quals about rowkey: full table scan if not rowkey: for rk, response in self.table.scan(columns=qualifiers, filter=filter_str, include_timestamp=self.include_timestamp, timestamp=ts): yield self.wrap(rk, response) # Equal on rowkey: single get elif isinstance(rowkey, basestring): yield self.wrap(rowkey, self.table.row(rowkey, qualifiers, include_timestamp=self.include_timestamp, timestamp=ts)) # In clause: multiple rowkey, multiple get. elif isinstance(rowkey, list): for rk, response in self.table.rows(rowkey, qualifiers, include_timestamp=self.include_timestamp, timestamp=ts): yield self.wrap(rk, response) # Range clause(< <= > >= between and): scan with rowkey range and timestamp range elif isinstance(rowkey, dict): if self.debug: log('[HB-FDW] %s' % rowkey) log('[HB-FDW] %s' % qualifiers) log('[HB-FDW] %s' % filter_str) log('[HB-FDW] %s' % ts) for rk, response in self.table.scan(rowkey.get('since'), rowkey.get('until'), columns=qualifiers, filter=filter_str, include_timestamp=self.include_timestamp, timestamp=ts): yield self.wrap(rk, response) else: raise ValueError('[HB-FDW] Invalid rowkey')
def __init__(self, fdw_options, fdw_columns): """ Setup foreign table options and column definition :param fdw_options: Specified via foreign table ddl options :param fdw_columns: Specified via foreign table DDL """ super(HappyBaseFdw, self).__init__(fdw_options, fdw_columns) self.fdw_columns = fdw_columns self.fdw_options = fdw_options # Options: # Thrift host & port self.host = fdw_options.get('host', 'localhost') self.port = int(fdw_options.get('port', '9090')) # table: HBase table name (Required) self.table_name = fdw_options.get('table') # family: Column family (Optional) self.family = fdw_options.get('family') # Debug: Print debug message (Optional) self.debug = fdw_options.get('debug', None) == 'True' if not self.table_name or not self.host: raise ValueError('[HB-FDW] Host and table should be specified!') self.qualifier = {} self.serializer = {} self.include_timestamp = False self.ts_converter = None self.ts_reconverter = None for col_name, col_def in fdw_columns.iteritems(): if col_name == 'rowkey': continue if col_name == 'timestamp': self.include_timestamp = True self.ts_converter = TS_CONVERTER.get(col_def.type_oid) self.ts_reconverter = TS_RECONVERTER.get(col_def.type_oid) continue qualifier = col_def.options.get('qualifier') # Column family already specified if self.family: if qualifier: qualifier = self.family + ':' + qualifier else: qualifier = self.family + ':' + col_name else: if not qualifier: qualifier = col_name.replace('_', ':', 1) self.qualifier[col_name] = qualifier serializer = col_def.options.get('serializer') self.serializer[col_name] = serializer if self.debug: log("[HB-FDW] FDW Column Define ========================") for col_name, cd in fdw_columns.iteritems(): log("[HB-FDW] %-12s\t[%d :%-s(%s:%s)] Opt:%s" % ( cd.column_name, cd.type_oid, cd.type_name, cd.base_type_name, cd.typmod, cd.options)) log("[HB-FDW] FDW Options ===============================") for k, v in fdw_options.iteritems(): log("[HB-FDW] %s:%s" % (k, v)) log("[HB-FDW] Column Alias ==============================") for k, v in self.qualifier.iteritems(): log("[HB-FDW] %12s\t%s" % (k, v)) self.conn = happybase.Connection(self.host, self.port) self.table = self.conn.table(self.table_name)
def __init__(self, fdw_options, fdw_columns): """ Setup foreign table options and column definition :param fdw_options: Specified via foreign table ddl options :param fdw_columns: Specified via foreign table DDL """ super(HappyBaseFdw, self).__init__(fdw_options, fdw_columns) self.fdw_columns = fdw_columns self.fdw_options = fdw_options # Options: # Thrift host & port self.host = fdw_options.get('host', 'localhost') self.port = int(fdw_options.get('port', '9090')) # table: HBase table name (Required) self.table_name = fdw_options.get('table') # family: Column family (Optional) self.family = fdw_options.get('family') # Debug: Print debug message (Optional) self.debug = fdw_options.get('debug', None) == 'True' if not self.table_name or not self.host: raise ValueError('[HB-FDW] Host and table should be specified!') self.qualifier = {} self.serializer = {} self.include_timestamp = False self.ts_converter = None self.ts_reconverter = None for col_name, col_def in fdw_columns.iteritems(): if col_name == 'rowkey': continue if col_name == 'timestamp': self.include_timestamp = True self.ts_converter = TS_CONVERTER.get(col_def.type_oid) self.ts_reconverter = TS_RECONVERTER.get(col_def.type_oid) continue qualifier = col_def.options.get('qualifier') # Column family already specified if self.family: if qualifier: qualifier = self.family + ':' + qualifier else: qualifier = self.family + ':' + col_name else: if not qualifier: qualifier = col_name.replace('_', ':', 1) self.qualifier[col_name] = qualifier serializer = col_def.options.get('serializer') self.serializer[col_name] = serializer if self.debug: log("[HB-FDW] FDW Column Define ========================") for col_name, cd in fdw_columns.iteritems(): log("[HB-FDW] %-12s\t[%d :%-s(%s:%s)] Opt:%s" % (cd.column_name, cd.type_oid, cd.type_name, cd.base_type_name, cd.typmod, cd.options)) log("[HB-FDW] FDW Options ===============================") for k, v in fdw_options.iteritems(): log("[HB-FDW] %s:%s" % (k, v)) log("[HB-FDW] Column Alias ==============================") for k, v in self.qualifier.iteritems(): log("[HB-FDW] %12s\t%s" % (k, v)) self.conn = happybase.Connection(self.host, self.port) self.table = self.conn.table(self.table_name)
def execute(self, quals, columns, sortkeys=None): """ Query hbase :param quals: list of qualification :param columns: set of required columns :param sortkeys: keys to be sored :return: data dict """ if self.debug: log("[HB-FDW] Query Begin ================================") log("[HB-FDW] Columns : %s" % columns) log("[HB-FDW] Quals : %s" % quals) # quals about rowkey: type of rowkey could be str, list, dict rowkey = None # quals about timestamp ts = None # other filter filter_str = None for qual in quals: # Parse rowkey quals and row filter if qual.field_name == 'rowkey': # single rowkey if qual.operator == '=': rowkey = qual.value if len(columns) == 1: # rowkey only yield {"rowkey": rowkey} return # multiple rowkey elif qual.is_list_operator: rowkey = qual.value if len(columns) == 1: for rk in rowkey: yield {"rowkey": rowkey} # range low bound elif qual.operator == '<=': if isinstance(rowkey, dict): rowkey['until'] = qual.value.encode('utf-8') else: rowkey = {'until': qual.value.encode('utf-8')} # range high bound elif qual.operator == '>=': if isinstance(rowkey, dict): rowkey['since'] = qual.value else: rowkey = {'since': qual.value} # Regex like elif qual.operator == '~': filter_str = "RowFilter(%s, 'regexstring:%s')" % ( '=', qual.value) elif qual.operator == '!~': filter_str = "RowFilter(%s, 'regexstring:%s')" % ( '!=', qual.value) else: log(qual) raise ValueError( "[HB-FDW] Supported operators on rowkey : =,<=,>=,in,any,between" ) # Parse timestamp quals elif qual.field_name == 'timestamp': # lots of timestamp related function is not supported by happybase. # fetch exactly: ts is a bigint if qual.operator == '=': ts = self.convert_timestamp(qual.value) # timestamp range low bound. ts is a dict with fields: `since` and `until` elif qual.operator == '<=' or qual.operator == '<': if isinstance(rowkey, dict): ts['until'] = self.convert_timestamp(qual.value) else: ts = {'until': self.convert_timestamp(qual.value)} # Upper bound elif qual.operator == '>=' or qual.operator == '>': if isinstance(rowkey, dict): ts['since'] = self.convert_timestamp(qual.value) else: ts = {'since': self.convert_timestamp(qual.value)} # Todo: Normal column condition push down seems useless, Maybe later # happybase special treatment: only < & <= is allowed for timestamp if isinstance(ts, dict) and ts.has_key('until'): ts = ts["until"] # Translate postgres column names into hbase column names qualifiers = [ self.qualifier[k] for k in columns if k != 'rowkey' and k != 'timestamp' ] # No quals about rowkey: full table scan if not rowkey: for rk, response in self.table.scan( columns=qualifiers, filter=filter_str, include_timestamp=self.include_timestamp, timestamp=ts): yield self.wrap(rk, response) # Equal on rowkey: single get elif isinstance(rowkey, basestring): yield self.wrap( rowkey, self.table.row(rowkey, qualifiers, include_timestamp=self.include_timestamp, timestamp=ts)) # In clause: multiple rowkey, multiple get. elif isinstance(rowkey, list): for rk, response in self.table.rows( rowkey, qualifiers, include_timestamp=self.include_timestamp, timestamp=ts): yield self.wrap(rk, response) # Range clause(< <= > >= between and): scan with rowkey range and timestamp range elif isinstance(rowkey, dict): if self.debug: log('[HB-FDW] %s' % rowkey) log('[HB-FDW] %s' % qualifiers) log('[HB-FDW] %s' % filter_str) log('[HB-FDW] %s' % ts) for rk, response in self.table.scan( rowkey.get('since'), rowkey.get('until'), columns=qualifiers, filter=filter_str, include_timestamp=self.include_timestamp, timestamp=ts): yield self.wrap(rk, response) else: raise ValueError('[HB-FDW] Invalid rowkey')
def validate(self, options, columns): if 'domain' not in options: log(message='No domain provided', level=logging.ERROR)
def handle_error(self, response): error = response['error']['message'] log(message=error, level=logging.ERROR)
def validate(self, options, columns): if 'uri' not in options: log(message='No uri given', level=logging.ERROR)
def validate(self, options, columns): if 'uri' not in options: log(message = 'No uri given', level = logging.ERROR)
def validate(self, options, columns): if 'key' not in options: log(message = 'No api key given', level = logging.ERROR) if 'table_name' not in options: log(message = 'No corresponding FQL table', level = logging.ERROR)