Exemplo n.º 1
0
    def innerLoopActivity(self, item):
        """
        This is where the actual ETL activity is called for the given compare item.
        """
        # Write unit data to the target repository:
        if self.itemCount == 0:
            config.createUnitDataAccessor(self.storageTgt).store(self.unitData)

        # Put together the site file:
        self._insertSiteFile(item, item.provItem.payload)

        # Obtain the raw count data archive:
        countsFilePath = self.gsProvider.resolvePayload(item)
        if not countsFilePath:
            return 0

        # Write raw count data archive to storage:
        print("%s -> %s" % (item.label, self.storageTgt.repository))
        catalogElement = self.storageTgt.createCatalogElement(
            item.identifier.base, item.identifier.ext, item.identifier.date,
            self.processingDate)
        self.storageTgt.writeFile(countsFilePath,
                                  catalogElement,
                                  cacheCatalogFlag=True)

        # Clean up:
        os.remove(countsFilePath)

        # Performance metrics:
        self.perfmet.recordCollect(item.identifier.date, representsDay=True)

        return 1
    def innerLoopActivity(self, item):
        """
        This is where the actual ETL activity is called for the given compare item.
        """
        # Write unit data to the target repository:
        if self.itemCount == 0:
            config.createUnitDataAccessor(self.storageTgt).store(self.unitData)
            
        # Read in the file and call the transformation code.
        print("%s: %s -> %s" % (item.label, self.storageSrc.repository, self.storageTgt.repository))
        filepathSrc = self.storageSrc.retrieveFilePath(item.label)
        outJSON, perfWork = wtStandardize(item, filepathSrc,
            self.storageTgt.makeFilename(item.identifier.base, "json", item.identifier.date), self.processingDate)

        # Clean up:
        os.remove(filepathSrc)
        
        # Prepare for writing to the target:
        catalogElement = self.storageTgt.createCatalogElement(item.identifier.base, "json",
                                                              item.identifier.date, self.processingDate)
        self.storageTgt.writeJSON(outJSON, catalogElement)
            
        # Final stages:
        self.perfmet.recordCollect(item.identifier.date, representsDay=True)
        for sensor, rec in perfWork.items():
            self.perfmet.recordSensorObs(sensor, "Vehicle Counts", perfmet.SensorObs(observation=rec[0],
                expected=None, collectionDate=item.identifier.date, minTimestamp=rec[1], maxTimestamp=rec[2]))
        self.perfmet.writeSensorObs()

        return 1
    def innerLoopActivity(self, item):
        """
        This is where the actual ETL activity is called for the given compare item.
        """
        # Commit old sensor observations:
        if self.prevDate and item.identifier.date > self.prevDate:
            # Write out uncommitted sensor performance metric observations for the prior date:
            self.perfmet.writeSensorObs()

        # Get site file:
        siteFileCatElem, newSiteFlag = self.siteFileCatElems.getForPrevDate(
            item.identifier.base, item.identifier.date, forceValid=True)
        if not siteFileCatElem:
            print("ERROR: No site file is found for '%s' for date %s." %
                  (item.identifier.base, str(item.identifier.date)))
            return 0
        if not newSiteFlag:
            siteFile = self.siteFileCache[item.identifier.base]
        else:
            # Get site file from repository if needed:
            siteFile = json.loads(
                self.storageSrc.retrieveBuffer(siteFileCatElem["pointer"]))
            self.siteFileCache[item.identifier.base] = siteFile

        # Obtain unit data, and write it to the target repository if it's new:
        unitData = self.unitDataProv.retrieve(item.identifier.date)
        if unitData != self.prevUnitData:
            config.createUnitDataAccessor(self.storageTgt).store(unitData)

        print("%s: %s -> %s" % (item.label, self.storageSrc.repository,
                                self.storageTgt.repository))
        worker = GSJSONStandard(item, siteFile, self.storageSrc,
                                self.storageTgt, self.processingDate)
        if not worker.jsonize():
            return 0

        # Write the site file if it is a new one:
        if newSiteFlag:
            catalogElement = self.storageTgt.createCatalogElement(
                item.identifier.base, "site.json",
                siteFileCatElem["collection_date"], self.processingDate)
            self.storageTgt.writeJSON(siteFile, catalogElement)

        # Performance metrics logging:
        self.perfmet.recordCollect(item.identifier.date, representsDay=True)
        self.perfmet.recordSensorObs(
            item.identifier.base, "Vehicle Counts",
            perfmet.SensorObs(observation=worker.perfWork[0],
                              expected=None,
                              collectionDate=item.identifier.date,
                              minTimestamp=worker.perfWork[1],
                              maxTimestamp=worker.perfWork[2]))

        return 1
    def innerLoopActivity(self, item):
        """
        This is where the actual ETL activity is called for the given compare item.
        """
        if item.identifier.ext not in ("traf_match_summary.txt", "matched.txt",
                                       "unmatched.txt"):
            print("WARNING: Unsupported file type or extension: %s" %
                  item.identifier.ext)
            return 0

        # Write unit data to the target repository:
        if self.itemCount == 0:
            config.createUnitDataAccessor(self.storageTgt).store(self.unitData)

        # Read in the file and call the transformation code.
        print("%s: %s -> %s" % (item.label, self.storageSrc.repository,
                                self.storageTgt.repository))
        filepathSrc = self.storageSrc.retrieveFilePath(item.label)
        fileType = item.identifier.ext.split(".")[
            0]  # Get string up to the file type extension.
        outJSON, perfWork = btStandardize(
            item, filepathSrc,
            self.storageTgt.makeFilename(item.identifier.base,
                                         fileType + ".json",
                                         item.identifier.date), fileType,
            self.processingDate)

        # Clean up:
        os.remove(filepathSrc)

        # Prepare for writing to the target:
        catalogElement = self.storageTgt.createCatalogElement(
            item.identifier.base, fileType + ".json", item.identifier.date,
            self.processingDate)
        self.storageTgt.writeJSON(outJSON, catalogElement)

        # Final stages:
        self.perfmet.recordCollect(item.identifier.date, representsDay=True)
        if fileType == "unmatched":
            for sensor, rec in perfWork.items():
                self.perfmet.recordSensorObs(
                    sensor, "Unmatched Entries",
                    perfmet.SensorObs(observation=rec[0],
                                      expected=None,
                                      collectionDate=item.identifier.date,
                                      minTimestamp=rec[1],
                                      maxTimestamp=rec[2]))
            # TODO: One issue we have with this is that there isn't a definitive way to know if a BT sensor is dead
            # without it totally missing from the list. We need to add in zero-entries for sensors we expect to be working.
            # Or, calculate expectations using prior records.
            self.perfmet.writeSensorObs()

        return 1
Exemplo n.º 5
0
    def etlActivity(self):
        """
        This performs the main ETL processing.
        
        @return count: A general number of records processed
        """
        # First, get the unit data for GRIDSMART:
        self.unitDataProv = config.createUnitDataAccessor(
            self.storageSrc).prepare(self.startDate, self.endDate)

        # Prepare to get site files:
        self.siteFileCatElems = self.storageSrc.catalog.getSearchableQueryDict(
            self.storageSrc.repository,
            base=None,
            ext="site.json",
            earlyDate=self.startDate,
            lateDate=self.endDate)

        # Configure the source and target repositories and start the compare loop:
        self.bases.clear()
        self.curDate = None
        count = self.doCompareLoop(
            last_update.LastUpdStorageCatProv(self.storageSrc,
                                              extFilter="%%.json"),
            last_update.LastUpdStorageCatProv(self.storageTgt),
            baseExtKey=False)
        # Process the last day's worth of records:
        count += self._processDay(self.curDate)

        print("Records processed: %d" % count)
        return count
 def etlActivity(self):
     """
     This performs the main ETL processing.
     
     @return count: A general number of records processed
     """
     # First, get the unit data for Wavetronix:
     unitDataProv = config.createUnitDataAccessor(self.dataSource)
     self.unitData = unitDataProv.retrieve()
             
     # Configure the source and target repositories and start the compare loop:
     count = self.doCompareLoop(last_update.LastUpdStorageCatProv(self.storageSrc),
                                last_update.LastUpdStorageCatProv(self.storageTgt),
                                baseExtKey=False)
     print("Records processed: %d" % count)
     return count    
Exemplo n.º 7
0
    def etlActivity(self):
        """
        This performs the main ETL processing.
        
        @return count: A general number of records processed
        """
        # First, get the unit data for GRIDSMART:
        unitDataProv = config.createUnitDataAccessor(self.dataSource)
        self.unitData = unitDataProv.retrieve()
        deviceLogreaders = gs_support.getDevicesLogreaders(
            self.unitData, self.deviceFilter)

        # Configure the source and target repositories and start the compare loop:
        self.gsProvider = last_upd_gs.LastUpdGSProv(deviceLogreaders,
                                                    self.tempDir)
        count = self.doCompareLoop(self.gsProvider,
                                   last_update.LastUpdStorageCatProv(
                                       self.storageTgt),
                                   baseExtKey=False)
        print("Records processed: %d" % count)
        return count