Python AutoConfigClient.AutoConfigClient示例，snakebite.client.AutoConfigClient.AutoConfigClient Python示例

示例#1

0

显示文件

文件： snakebite_client.py 项目： uk-gov-mirror/ukwa.luigi

 def get_bite(self):
     """
     If Luigi has forked, we have a different PID, and need to reconnect.
     """
     config = hdfs_config.hdfs()
     if self.pid != os.getpid() or not self._bite:
         client_kwargs = dict(
             filter(
                 lambda k_v: k_v[1] is not None and k_v[1] != '',
                 six.iteritems({
                     'hadoop_version': config.client_version,
                     'effective_user': config.effective_user,
                 })))
         if config.snakebite_autoconfig:
             """
             This is fully backwards compatible with the vanilla Client and can be used for a non HA cluster as well.
             This client tries to read ``${HADOOP_PATH}/conf/hdfs-site.xml`` to get the address of the namenode.
             The behaviour is the same as Client.
             """
             from snakebite.client import AutoConfigClient
             self._bite = AutoConfigClient(**client_kwargs)
         else:
             from snakebite.client import Client
             self._bite = Client(config.namenode_host, config.namenode_port,
                                 **client_kwargs)
     return self._bite

示例#2

0

显示文件

 def run(self):
     log.info('initiating snakebite hdfs client')
     try:
         client = AutoConfigClient()
     except krbV.Krb5Error as _:  # pylint: disable=no-member
         if self.verbose:
             print('', file=sys.stderr)
         print(_, file=sys.stderr)
     start_time = time.time()
     dir_count = 0
     file_count = 0
     repl1_count = 0
     for path in self.path_list:
         try:
             result_list = client.ls([path],
                                     recurse=True,
                                     include_toplevel=True,
                                     include_children=True)
             for result in result_list:
                 if self.verbose and (dir_count + file_count) % 100 == 0:
                     print('.', file=sys.stderr, end='')
                 if result['block_replication'] == 0:
                     dir_count += 1
                     continue
                 file_count += 1
                 if result['block_replication'] == 1:
                     file_path = result['path']
                     repl1_count += 1
                     if self.verbose:
                         print('', file=sys.stderr)
                     print(file_path)
                     if self.replication_factor:
                         log.info('setting replication factor to %s on %s',
                                  self.replication_factor, file_path)
                         # returns a generator so must evaluate in order to actually execute
                         # otherwise you find there is no effect on the replication factor
                         for _ in client.setrep([file_path],
                                                self.replication_factor,
                                                recurse=False):
                             if 'result' not in _:
                                 print(
                                     'WARNING: result field not found in setrep result: {}'
                                     .format(_),
                                     file=sys.stderr)
                                 continue
                             if not _['result']:
                                 print(
                                     'WARNING: failed to setrep: {}'.format(
                                         _))
         except (snakebite.errors.FileNotFoundException,
                 snakebite.errors.RequestError) as _:
             if self.verbose:
                 print('', file=sys.stderr)
             print(_, file=sys.stderr)
     if self.verbose:
         print('', file=sys.stderr)
     secs = int(time.time() - start_time)
     print('\nCompleted in {} secs\n'.format(secs), file=sys.stderr)
     print('{} files with replication factor 1 out of {} files in {} dirs'\
           .format(repl1_count, file_count, dir_count), file=sys.stderr)

示例#3

0

显示文件

文件： hdfs_hook.py 项目： yunhen/airflow

    def get_conn(self):
        '''
        Returns a snakebite HDFSClient object.
        '''
        connections = self.get_connections(self.hdfs_conn_id)

        use_sasl = False
        if configuration.get('core', 'security') == 'kerberos':
            use_sasl = True

        client = None

        ''' When using HAClient, proxy_user must be the same, so is ok to always take the first '''
        effective_user = self.proxy_user or connections[0].login
        if len(connections) == 1:
            autoconfig = connections[0].extra_dejson.get('autoconfig', False)
            if autoconfig:
                client = AutoConfigClient(effective_user=effective_user, use_sasl=use_sasl)
            else:
                client = Client(connections[0].host, connections[0].port,
                                effective_user=effective_user, use_sasl=use_sasl)
        elif len(connections) > 1:
            nn = [Namenode(conn.host, conn.port) for conn in connections]
            client = HAClient(nn, effective_user=effective_user, use_sasl=use_sasl)
        else:
            raise HDFSHookException("conn_id doesn't exist in the repository")
        
        return client

示例#4

0

显示文件

文件： hdfs.py 项目： gdtm86/luigi

 def get_bite(self):
     """
     If Luigi has forked, we have a different PID, and need to reconnect.
     """
     if self.pid != os.getpid() or not self._bite:
         client_kwargs = dict(
             filter(
                 lambda (k, v): v is not None and v != '', {
                     'hadoop_version':
                     self.config.getint("hdfs", "client_version", None),
                     'effective_user':
                     self.config.get("hdfs", "effective_user", None)
                 }.iteritems()))
         if self.config.getboolean("hdfs", "snakebite_autoconfig", False):
             """
             This is fully backwards compatible with the vanilla Client and can be used for a non HA cluster as well.
             This client tries to read ``${HADOOP_PATH}/conf/hdfs-site.xml`` to get the address of the namenode.
             The behaviour is the same as Client.
             """
             from snakebite.client import AutoConfigClient
             self._bite = AutoConfigClient(**client_kwargs)
         else:
             from snakebite.client import Client
             self._bite = Client(
                 self.config.get("hdfs", "namenode_host"),
                 self.config.getint("hdfs", "namenode_port"),
                 **client_kwargs)
     return self._bite

示例#5

0

显示文件

文件： file_manager.py 项目： next-generation-search-engine/fedlearner

def build_hdfs_client():
    # pylint: disable=global-statement
    global _hdfs_client
    _hdfs_client = AutoConfigClient()
    # To warm up the connection to the namenode
    # Otherwise it may take 3+ minutes at the first time
    _hdfs_client.df()

示例#6

0

显示文件

文件： hdfs.py 项目： szkielet/luigi

 def get_bite(self):
     """
     If Luigi has forked, we have a different PID, and need to reconnect.
     """
     if self.pid != os.getpid() or not self._bite:
         autoconfig_enabled = self.config.getboolean(
             "hdfs", "snakebite_autoconfig", False)
         if autoconfig_enabled is True:
             """
             This is fully backwards compatible with the vanilla Client and can be used for a non HA cluster as well.
             This client tries to read ``${HADOOP_PATH}/conf/hdfs-site.xml`` to get the address of the namenode.
             The behaviour is the same as Client.
             """
             from snakebite.client import AutoConfigClient
             self._bite = AutoConfigClient()
         else:
             from snakebite.client import Client
             try:
                 ver = self.config.getint("hdfs", "client_version")
                 if ver is None:
                     raise RuntimeError()
                 self._bite = Client(
                     self.config.get("hdfs", "namenode_host"),
                     self.config.getint("hdfs", "namenode_port"),
                     hadoop_version=ver)
             except:
                 self._bite = Client(
                     self.config.get("hdfs", "namenode_host"),
                     self.config.getint("hdfs", "namenode_port"))
     return self._bite

示例#7

0

显示文件

 def test_autoconfig_client_trash_false(self, environ_get):
     environ_get.return_value = False
     HDFSConfig.core_try_paths = (
         self.get_config_path('ha-core-site.xml'), )
     HDFSConfig.hdfs_try_paths = (
         self.get_config_path('ha-noport-hdfs-site.xml'), )
     client = AutoConfigClient()
     self.assertFalse(client.use_trash)

示例#8

0

显示文件

文件： hdfsclient.py 项目： Kendralabs/spark-tk-jupyter-debian

def mv(src, dest, overwrite=False):
    """
    src (str) : Source path on HDFS
    dest (str) : Destination path on HDFS
    overwrite (boolean) : Overwrite dest if exists
    """
    client = AutoConfigClient()

    list(client.rename2(src, dest, overwrite))

示例#9

0

显示文件

文件： hdfsclient.py 项目： Kendralabs/spark-tk-jupyter-debian

def mkdir(hdfs_path, create_parent=False, mode=0755):
    """
    paths (list of strings) : Paths to create
    create_parent (boolean) : Also create the parent directories
    mode (int) : Mode the directory should be created with
    Returns:
    String mkdir result as json
    """
    client = AutoConfigClient()

    return list(client.mkdir([hdfs_path], create_parent, mode))

示例#10

0

显示文件

文件： test-hdfs.py 项目： p-giovanni/Hadoop-HDFS-test-environment

def main():
    hadoop_conf_dir = "/media/d2/code-sky/dockers/hadoop/etc/hadoop"
    os.environ['HADOOP_CONF_DIR'] = hadoop_conf_dir

    file_dict = {}

    cli = AutoConfigClient()
    target_hdfs_path = "/"

    for element in cli.ls([target_hdfs_path]):
        print("Result: " + str(element))

示例#11

0

显示文件

文件： hdfsclient.py 项目： Kendralabs/spark-tk-jupyter-debian

def rm(hdfs_path, recurse=False, force=False):
    """
    hdfs_path (str or list of strings) : hdfs files to delete
    recurse (boolean) : recursively delete the folder
    force (boolean) : force deletion (non-interactive) 
    Returns:
    String mkdir result as json
    """
    client = AutoConfigClient()

    return list(client.delete([hdfs_path], recurse))

示例#12

0

显示文件

文件： hdfs.py 项目： folly3/airflow-1

    def get_conn(self) -> Any:
        """
        Returns a snakebite HDFSClient object.
        """
        # When using HAClient, proxy_user must be the same, so is ok to always
        # take the first.
        effective_user = self.proxy_user
        autoconfig = self.autoconfig
        use_sasl = conf.get('core', 'security') == 'kerberos'

        try:
            connections = self.get_connections(self.hdfs_conn_id)

            if not effective_user:
                effective_user = connections[0].login
            if not autoconfig:
                autoconfig = connections[0].extra_dejson.get(
                    'autoconfig', False)
            hdfs_namenode_principal = connections[0].extra_dejson.get(
                'hdfs_namenode_principal')
        except AirflowException:
            if not autoconfig:
                raise

        if autoconfig:
            # will read config info from $HADOOP_HOME conf files
            client = AutoConfigClient(effective_user=effective_user,
                                      use_sasl=use_sasl)
        elif len(connections) == 1:
            client = Client(
                connections[0].host,
                connections[0].port,
                effective_user=effective_user,
                use_sasl=use_sasl,
                hdfs_namenode_principal=hdfs_namenode_principal,
            )
        elif len(connections) > 1:
            name_node = [
                Namenode(conn.host, conn.port) for conn in connections
            ]
            client = HAClient(
                name_node,
                effective_user=effective_user,
                use_sasl=use_sasl,
                hdfs_namenode_principal=hdfs_namenode_principal,
            )
        else:
            raise HDFSHookException("conn_id doesn't exist in the repository "
                                    "and autoconfig is not specified")

        return client

示例#13

0

显示文件

文件： hdfsclient.py 项目： Kendralabs/spark-tk-jupyter-debian

def ls(hdfs_path, recurse=False, include_toplevel=True, include_children=False):
    """
    Parameters:
    paths (list) : Paths to list
    recurse (boolean) : Recursive listing
    include_toplevel (boolean) : Include the given path in the listing. If the path is a file, include_toplevel is always True.
    include_children (boolean) : Include child nodes in the listing.
    Returns:
    (list) path listings with attributes
    """
    client = AutoConfigClient()

    path_info = list(client.ls([hdfs_path], recurse, include_toplevel, include_children))

    return LsObject(path_info)

示例#14

0

显示文件

import urllib2, os, json, pytz, sys
import datetime, calendar, pprint
import argparse
import snakebite
import subprocess
from snakebite.client import AutoConfigClient
client = AutoConfigClient()

# download.py
# modified version of /home/zsb739/code/libs/ripe-measurement-downloader/experiment_launcher/download.py
# This script downloads data from ripe atlas and stores it in the hdfs


def parse_args():
    parser = argparse.ArgumentParser(
        description='Download daily RIPE data for the provided '
        'measurement ID number')
    parser.add_argument(
        'measurement',
        type=int,
        nargs="+",
        help="The integer identification number for the desired "
        "measurement")
    return parser.parse_args()


def days(start, stop=None):
    if stop == None:
        curr_time = datetime.datetime.utcnow()
        stop_time = datetime.datetime(curr_time.year,
                                      curr_time.month,

示例#15

0

显示文件

文件： hdfs.py 项目： bataeves/isparkcache

 def __init__(self):
     check_output("hadoop")
     self.fs = AutoConfigClient()

示例#16

0

显示文件

 def __init__(self):
     self._client = AutoConfigClient()

示例#17

0

显示文件

文件： cutflow_plotter.py 项目： professor-calculus/OnlyToolsAndForces

parser.add_argument('-t', '--title', default='Cutflow Efficiency')
parser.add_argument(
    '-x',
    '--NoX',
    action='store_true',
    help='This argument suppresses showing plots via X-forwarding')
parser.add_argument('-o',
                    '--NoOutput',
                    action='store_true',
                    help='This argument suppresses the output of PDF plots')
args = parser.parse_args()

df_list = []
file_list = []

fs = AutoConfigClient()

HT_eff_tot = []
MHT_eff_tot = []
BDP_eff_tot = []
NBJet_eff_tot = []
NJet_eff_tot = []
NVeto_eff_tot = []
M_sq = []
M_lsp = []

for f in fs.text([args.file + '/*/ROOTCuts_output/ROOTCuts.txt']):
    df = pd.read_csv(StringIO(f), delimiter=r'\s+')
    df_HT = df.loc[(df['HT'] > 1200.)]
    df_MHT = df.loc[(df['MHT'] > 200.)]
    df_NBJet = df.loc[(df['NBJet'] > 1)]