Пример #1
0
    def processApp(self, appCode, hiveClient):
        appConfig = self.getAppConfig(appCode)
        if self.event:
            eventCodes = [self.event]
        else:
            eventCodes = [appEvent.code for appEvent in appConfig.getEvents()]
        dbSession = self.getDBSession()
        hiveMetaService = HiveMetaService(dbSession)
        counter = 0
        _len = len(eventCodes)
        for eventCode in eventCodes:
            counter += 1
            print "\n",'start pack key {}.{} for date {} ({}/{})'.format(appCode, eventCode, self.date, counter, _len)
            app = self.getApp(appCode)
            if app:
                # получаем таблицу
                hiveTable = hiveMetaService.getOrCreateHiveTable(app.appId, eventCode)
                if not hiveTable:
                    print 'Cannot create new hiveTable. Terminate'
                    return

                # существует ли партиция физически
                if not self.HDFSClient.isPartitionExist(appCode, eventCode, self.date):
                    print 'folder for partition not exist. Next event'
                    continue

                # получаем партицию таблицы
                hiveTablePartition = hiveMetaService.getOrCreateHiveTablePartition(hiveTable.hiveTableId, self.date)
                if not hiveTablePartition:
                    print 'Cannot create new hiveTablePartition. Terminate'
                    continue

                # если не сжата
                if not hiveTablePartition.isCompact or self.skipCheckInDB:
                    print 'Start pack table {}.{}'.format(appCode, eventCode)
                    try:
                        start = datetime.now()
                        query = PACK_TABLE_QUERY.format(eventCode, '%(year)d-%(month)02d-%(day)02d' % {'year': self.year, 'month': self.month, 'day': self.day})
                        hiveClient.execute('USE {}'.format(self.getDBName(appCode)))
                        print query
                        hiveClient.execute(query)
                        end = datetime.now()
                        print 'Pack complete. Query time: {}'.format(end - start)
                        time.sleep(10)
                    except Exception as ex:
                        print 'Pack end with exception {}'.format(ex.message)
                    else:
                        hiveTablePartition.isCompact = 1
                        print 'Set compact label to in partition meta'

                        dbSession.add(hiveTablePartition)
                        dbSession.commit()
                else:
                    print 'table {}.{} already packed'.format(appCode, eventCode)
            else:
                print  'cant find app {} in database'.format(appCode)
Пример #2
0
class InitHiveMetaDataScript(BaseAnalyticsScript):

    def run(self):
        print 'run InitHiveMetaDataScript'
        appCodes = self.getAppCodes()
        self.hiveMetaService = HiveMetaService(self.getDBSession())


        for appCode in appCodes:
            appConfig = self.getAppConfig(appCode)
            self.processApp(appConfig)

    def processApp(self, appConfig):
        appCode = appConfig.getAppCode()
        app = self.getApp(appCode)
        if not app:
            print 'Cant find app with code {}. Terminate.'.format(appCode)
            self.terminate()
        print 'Process app {}'.format(appCode)

        for appEvent in appConfig.getEvents():
            hiveTable = self.hiveMetaService.getOrCreateHiveTable(app.appId, appEvent.code)
            if not hiveTable:
                print 'Cant get or create HiveTable for {}, {}'.format(appCode, appEvent.code)
                continue
            self.processHiveTable(hiveTable, appCode, appEvent.code)

    def processHiveTable(self, hiveTable, appCode, eventCode):
        print 'processHiveTable for {}, {}'.format(appCode, eventCode)
        dbSession = self.getDBSession()
        analyticsWebHDFS = self.getWebHDFSClient()
        try:
            partitionsDates = analyticsWebHDFS.getPartitions(appCode, eventCode)
        except WebHDFSException as e:
            print 'Exception on getPartitions: {}'.format(e.message)
        else:
            for partitionDate in partitionsDates:
                hivePartition = self.hiveMetaService.getOrCreateHiveTablePartition(hiveTable.hiveTableId, partitionDate)
                if not hivePartition:
                    print('Cant get or create partition for {} date {}'.format(eventCode, hivePartition.partitionDate))
                    continue

            if partitionsDates:
                minDate = min(partitionsDates)
                hiveTable.startFrom = minDate
                dbSession.add(hiveTable)
                dbSession.commit()
                print 'Set start from {} {} {}'.format(appCode, eventCode, minDate)