Пример #1
0
    def getHdfsDirSizeForSSH(self, hdfsDir): 
        sysConf = ConfModel.getSystemConf()
        nameNodeAccount =  sysConf.get('hadoop').get('hadoop_namenode_account')
        nameNodeHost = sysConf.get('hadoop').get('hadoop_namenode_host')

        command = "hdfs dfs -du -s " + hdfsDir
        commandRs = Process.sshCommand(nameNodeAccount, nameNodeHost, command)

        rs = {}
        if (commandRs.get('code') == 0):
            strToList = commandRs.get('stdoutPut').split('\n')
            # 过滤多余的行
            line = ""
            for curLine in strToList :
                if (len(curLine) == 0):
                    continue
                elif ( "bash" in curLine) :
                    continue
                else :
                    line = curLine
            filterList = line.split()
            rs['dir'] = filterList[2]
            rs['dataSize'] = int(filterList[0])
            rs['hdfsSize'] = int(filterList[1])
        else:
            print commandRs.get('erroutPut')
        return rs
Пример #2
0
    def importMysqlToHive(self):

        # 当前选择数据库服务器
        dbServer = self.getDbServer()

        # 获取 带抽取的 Mysql 表结构 ,用来创建 Hive 数据表
        mysqlFileds = self.dbServerModel.getFileds(self.getSourceDb(),
                                                   self.getSourceTable())
        mysqlFiledsFormat = '=String,'.join(mysqlFileds) + "=String"

        # 执行脚本
        script = self.systemCorePath['shellPath'] + '/sqoop_import_mysql.sh '
        # 参数
        script += '--sqoop_home \"' + self.systemConf['sqoop'][
            'sqoop_home'] + '\" '
        script += '--local_tmp_dir \"' + self.systemCorePath['tmpPath'] + '\" '
        script += '--hdfs_sqoop_tmp_dir \"/tmp/sqoop\" '
        script += '--mysql_host \"' + dbServer['mysql_host'] + '\" '
        script += '--mysql_port \"' + dbServer['mysql_port'] + '\" '
        script += '--mysql_user \"' + dbServer['mysql_user'] + '\" '
        script += '--mysql_password \"' + dbServer['mysql_password'] + '\" '
        script += '--mysql_database \"' + self.getSourceDb() + '\" '
        script += '--mysql_table \"' + self.getSourceTable() + '\" '
        script += '--hive_table \"' + self.getTargetDb(
        ) + '.' + self.getTargetTable() + '\" '
        script += '--fields_terminated_by \"' + self.getFieldsTerminated(
        ) + '\" '
        script += '--map_column_hive_fields \"' + mysqlFiledsFormat + '\" '
        script += '--mappers_num \"' + str(self.getMapReduceNum()) + '\"'

        result = Process.runScriptSync(script)
        return result
Пример #3
0
    def importMysqlToHbase(self, querySql=None):
        # 当前选择数据库服务器
        dbServer = self.getDbServer()

        tmpDir = self.systemCorePath['tmpPath'] + '/sqoop_outdir'
        targetDir = '/tmp/sqoop/' + self.getTargetTable()

        # 执行脚本
        script = self.systemConf['sqoop']['sqoop_home'] + '/bin/sqoop import '
        script += '--connect \"jdbc:mysql://' + dbServer[
            'mysql_host'] + ':' + dbServer['mysql_port'] + '/' + self.getSourceDb(
            ) + '?useUnicode=true&tinyInt1isBit=false&characterEncoding=utf-8\" '
        script += '--username \"' + dbServer['mysql_user'] + '\" '
        script += '--password \"' + dbServer['mysql_password'] + '\" '
        script += '--hbase-create-table '
        script += '--hbase-table \"' + self.getTargetTable() + '\" '
        script += '--column-family \"' + self.getbaseColumnFamily() + '\" '
        script += '--m \"' + str(self.getMapReduceNum()) + '\" '
        script += '--outdir \"' + tmpDir + '\" '
        script += '--target-dir \"' + targetDir + '\" '
        script += '--delete-target-dir '

        u' 表示整张表导入'
        if (querySql == None):
            rmTmpTable = 'rm ' + tmpDir + '/' + self.getSourceTable() + '.java'
            Process.runScriptSync(rmTmp)

            script += '--table \"' + self.getSourceTable() + '\" '
        else:
            u' 清理临时文件'
            rmTmp = 'rm ' + tmpDir + '/QueryResult.java'
            Process.runScriptSync(rmTmp)

            u' 表示使用查询语句导入'
            script += '--hbase-row-key \"' + self.getHbaseRowKey() + '\" '
            script += '--split-by \"' + self.getHbaseRowKey() + '\" '
            script += '--query \"' + querySql + '\"'

        #print script
        result = Process.runScriptSync(script)
        return result
Пример #4
0
    def mysqlDumpFile(self, sql, file):

        # sed 格式化规则
        # 处理行中的换行符号
        rowRegexp = 's/[\\n|\\r\\n]//g;'
        # 处理行中 NULL 字符串
        rowRegexp += 's/NULL/\\\\\N/g;'
        # 列分隔符
        rowRegexp += 's/\t/\001/g;'

        # 组合命令
        script = self.getMysqlCommand(
        ) + " -N -s -e " "\"" + sql + "\" | sed -e \"" + rowRegexp + "\" > " + file

        result = Process.runScriptSync(script)

        return result
Пример #5
0
    def hiveBinInterface(self):
        #process = Process()
        #result =  process.runScript('ls ~/develop/jason/dw_etl/dw_service ')
        #print result['stdoutPut']

        #list = ['ls ~/develop/jason/dw_etl/dw_service','ls ~/develop/jason/uba']
        #list =  ['hadoop dfs -ls /user/hive','hadoop dfs -ls /user']
        list = [
            'hive -e "select count(*) from dw_db.dw_broker_summary_basis_info_daily;"',
            'hive -e "select count(*) from dw_db.dw_cal;"'
        ]
        result = Process.runThreadingScripts(list)

        status = True
        while (status):
            sleep(1)
            for item in result:
                print item
                u'获取当前活动的(alive)线程的个数'
                print item.isAlive()
Пример #6
0
 def runHiveScript(self, sql):
     hiveHome = self.hiveConf['hiveHome']
     runCommand = hiveHome + "/bin/hive -e " + '\"' + sql + '\"'
     result = Process.runScriptSync(runCommand)
     return result
Пример #7
0
 def runMysqlCommand(self, sql=''):
     scriptBase = self.getMysqlCommand() + " -N -s -e "
     runScript = scriptBase + "\"" + sql + "\""
     return Process.runScriptSync(runScript)
Пример #8
0
 def hadoopCommand(self,command):
     return Process.runScriptSync(command)