def get_conn(self): ''' Returns a snakebite HDFSClient object. ''' connections = self.get_connections(self.hdfs_conn_id) use_sasl = False if configuration.get('core', 'security') == 'kerberos': use_sasl = True client = None ''' When using HAClient, proxy_user must be the same, so is ok to always take the first ''' effective_user = self.proxy_user or connections[0].login if len(connections) == 1: autoconfig = connections[0].extra_dejson.get('autoconfig', False) if autoconfig: client = AutoConfigClient(effective_user=effective_user, use_sasl=use_sasl) else: client = Client(connections[0].host, connections[0].port, effective_user=effective_user, use_sasl=use_sasl) elif len(connections) > 1: nn = [Namenode(conn.host, conn.port) for conn in connections] client = HAClient(nn, effective_user=effective_user, use_sasl=use_sasl) else: raise HDFSHookException("conn_id doesn't exist in the repository") return client
def get_conn(self): ''' Returns a snakebite HDFSClient object. ''' use_sasl = False securityConfig = None if securityConfig == 'kerberos': # TODO make confugration file for thiw use_sasl = True connections = self.get_connections(self.hdfs_conn_id) client = None # When using HAClient, proxy_user must be the same, so is ok to always take the first effective_user = self.proxy_user or connections[0].login if len(connections) == 1: client = Client(connections[0].host, connections[0].port, use_sasl=use_sasl, effective_user=effective_user) elif len(connections) > 1: nn = [Namenode(conn.host, conn.port) for conn in connections] client = HAClient(nn, use_sasl=use_sasl, effective_user=effective_user) else: raise HDFSHookException("conn_id doesn't exist in the repository") return client
def __init__(self, filepath, hdfs_conn_id='hdfs_default', *args, **kwargs): super(HdfsSensor, self).__init__(*args, **kwargs) self.filepath = filepath session = settings.Session() db = session.query(DB).filter(DB.conn_id == hdfs_conn_id).first() if not db: raise Exception("conn_id doesn't exist in the repository") self.host = db.host self.port = db.port NAMENODES = [Namenode(self.host, self.port)] self.sb = HAClient(NAMENODES) session.commit() session.close()
def get_conn(self) -> Any: """ Returns a snakebite HDFSClient object. """ # When using HAClient, proxy_user must be the same, so is ok to always # take the first. effective_user = self.proxy_user autoconfig = self.autoconfig use_sasl = conf.get('core', 'security') == 'kerberos' try: connections = self.get_connections(self.hdfs_conn_id) if not effective_user: effective_user = connections[0].login if not autoconfig: autoconfig = connections[0].extra_dejson.get( 'autoconfig', False) hdfs_namenode_principal = connections[0].extra_dejson.get( 'hdfs_namenode_principal') except AirflowException: if not autoconfig: raise if autoconfig: # will read config info from $HADOOP_HOME conf files client = AutoConfigClient(effective_user=effective_user, use_sasl=use_sasl) elif len(connections) == 1: client = Client( connections[0].host, connections[0].port, effective_user=effective_user, use_sasl=use_sasl, hdfs_namenode_principal=hdfs_namenode_principal, ) elif len(connections) > 1: name_node = [ Namenode(conn.host, conn.port) for conn in connections ] client = HAClient( name_node, effective_user=effective_user, use_sasl=use_sasl, hdfs_namenode_principal=hdfs_namenode_principal, ) else: raise HDFSHookException("conn_id doesn't exist in the repository " "and autoconfig is not specified") return client
def get_conn(self): ''' Returns a snakebite HDFSClient object. ''' connections = self.get_connections(self.hdfs_conn_id) client = None if len(connections) == 1: client = Client(connections[0].host, connections[0].port) elif len(connections) > 1: nn = [Namenode(conn.host, conn.port) for conn in connections] client = HAClient(nn) else: raise HDFSHookException("conn_id doesn't exist in the repository") return client
def get_conn(self): ''' Returns a snakebite HDFSClient object. ''' use_sasl = False if conf.get('core', 'security') == 'kerberos': use_sasl = True connections = self.get_connections(self.hdfs_conn_id) client = None if len(connections) == 1: client = Client(connections[0].host, connections[0].port, use_sasl=use_sasl) elif len(connections) > 1: nn = [Namenode(conn.host, conn.port) for conn in connections] client = HAClient(nn, use_sasl=use_sasl) else: raise HDFSHookException("conn_id doesn't exist in the repository") return client