示例#1
0
class Config:
    env = os.environ

    if not env.get("SENTRY_DSN"):
        logger.warning("SENTRY_DSN is not set")
    else:
        sentry_sdk.init(env.get("SENTRY_DSN"))

    if not env.get("BOUNDARY"):
        logger.warning("BOUNDARY will be set to 65")
        BOUNDARY = 65
    else:
        BOUNDARY = env.get("BOUNDARY")

    if not env.get("DEVICE_ENV"):
        logger.warning("DEVICE_ENV will be set to None")
        DEVICE_ENV = ''
    else:
        DEVICE_ENV = env.get("DEVICE_ENV")

    if not env.get("DATA_FOLDER"):
        logger.exception("DATA_FOLDER is not set")
    else:
        DIRECTORY = env.get("DATA_FOLDER")
        if not env.get("ANALYSIS_FOLDER"):
            logger.exception("ANALYSIS_FOLDER is not set")
        else:
            ANALYSIS = open(f"{env.get('ANALYSIS_FOLDER')}/analysis.txt", 'w')

    if not env.get("INSPECT_DATA") and not env.get("INSPECT_DROPPED"):
        INSPECT_DROPPED = True
        INSPECT_DATA = True
        logger.warning("The application will run all features\n")
    elif (env.get("INSPECT_DATA") is None or env.get("INSPECT_DATA").upper() == "FALSE") \
            and env.get("INSPECT_DROPPED").upper() == "TRUE":
        INSPECT_DROPPED = True
        INSPECT_DATA = False
        logger.info("The application will inspect dropped updates\n")
    elif env.get("INSPECT_DATA").upper() == "TRUE" and \
            (env.get("INSPECT_DROPPED") is None or env.get("INSPECT_DROPPED").upper() == "FALSE"):
        INSPECT_DATA = True
        INSPECT_DROPPED = False
        logger.info("The application will inspect data\n")
    elif env.get("INSPECT_DATA").upper() == "TRUE" and env.get(
            "INSPECT_DROPPED").upper() == "TRUE":
        INSPECT_DATA = True
        INSPECT_DROPPED = True
        logger.info("The application will run all features\n")
    else:
        logger.exception(
            f"Invalid combination of INSPECT_DATA and INSPECT_DROPPED\n"
            f"INSPECT_DATA: {env.get('INSPECT_DATA')}\n"
            f"INSPECT_DROPPED: {env.get('INSPECT_DROPPED')}"
            f"\nThe application requires at least one of the features to be set"
            f" and to be True")
示例#2
0
 def station_count(self):
     """
     Return data point's stations length
     :return: list of stations size per hour
     """
     stations = [0] * 24
     for hour in self.data_file.buckets:
         for pt in self.data_file.buckets[hour]:
             try:
                 pt_data = json.loads(pt['data'])
                 if 'stations' in pt_data:
                     stations[hour] = len(pt_data['stations'])
             except ValueError:
                 logger.error(f"Invalid json string: {pt['data']}")
                 raise ValueError
     logger.info(f"stations: {stations}")
     return stations
示例#3
0
 def get_device_type(self):
     """
     Returns the device type for each raw data file
     :return:
     """
     device_id = os.path.basename(self.data_file.file).partition("-")[0].replace('_', ':')
     device_type = {device_id: "Unknown"}
     for pt in self.data_file.sorted_data:
         if pt['type'] == 'slow':
             pt_data = pt['data']
             if device_detection.detect_device_type(device_id, Config.DEVICE_ENV, pt_data) \
                     is not None:
                 device_type[device_id] = \
                     (device_detection.detect_device_type(device_id, Config.DEVICE_ENV, pt_data))
                 logger.info(f"{device_type}")
                 return device_type
     logger.info(f"{device_type}")
     return device_type
示例#4
0
def main():
    try:
        for file_path in glob.glob(f"{Config.DIRECTORY}/*.jsonl"):
            Config.ANALYSIS.write(file_path + "\n\n")
            logger.info(f"filename: {file_path}")
            sort_data = read_data(file_path)
            hours_bucket = initialize_buckets(sort_data)
            data_file = DataFile(file_path, sort_data, hours_bucket)
            if Config.INSPECT_DROPPED:
                drop = inspect_dropped.Dropped(data_file)
                drop.run()
            if Config.INSPECT_DATA:
                inspect = inspect_data.Data(data_file)
                inspect.run()
            logger.success(f"file {str(file_path)} completed\n")
    except Exception:
        capture_exception()
        logger.exception("Could not finish reading files")
示例#5
0
 def avg_upd_not_dropped(self):
     """
     Calculates average time difference of updates that are not dropped
     :return:
     """
     sum_upd = 0
     count = 0
     for hour in self.data_file.buckets:
         for i in range(len(self.data_file.buckets[hour]) - 1):
             if not is_dropped(self.data_file.buckets[hour][i + 1]['timestamp'],
                               self.data_file.buckets[hour][i]['timestamp']):
                 sum_upd += self.data_file.buckets[hour][i + 1]['timestamp'] - \
                            self.data_file.buckets[hour][i]['timestamp']
                 count += 1
     if count == 0:
         logger.error("every update is dropped")
         return 1.0
     average = round(sum_upd / count, 2)
     logger.info(f"average upd duration if not dropped:{average}s")
     return average
示例#6
0
 def avg_upd_dropped(self):
     """
     Calculates average time difference of dropped updates
     :return:
     """
     sum_upd = 0
     count = 0
     for hour in self.data_file.buckets:
         for i in range(len(self.data_file.buckets[hour]) - 1):
             if is_dropped(self.data_file.buckets[hour][i + 1]['timestamp'],
                           self.data_file.buckets[hour][i]['timestamp']):
                 sum_upd += self.data_file.buckets[hour][i + 1]['timestamp'] \
                            - self.data_file.buckets[hour][i]['timestamp']
                 count += 1
     if count == 0:
         logger.info("no dropped updates")
         return 1
     average = round(sum_upd / count, 2)
     logger.info(f"average update duration if dropped: {average}s")
     return round(sum_upd / count, 2)
示例#7
0
    def missing_reg(self):
        """
        Write the estimated number of possible missing regular updates to analysis file
        :return: dictionary, consecutive reg upd tuple as keys and time diff-hour tuple as values
        """
        keys = []
        values = []
        count = [0] * 24

        for hour in self.data_file.buckets:
            for i in range(len(self.data_file.buckets[hour])):
                data_pt = self.data_file.buckets[hour][i]
                if data_pt['type'] == 'slow':
                    time_before = self.data_file.buckets[hour][i - 1]['timestamp']
                    time_slow = self.data_file.buckets[hour][i]['timestamp']
                    if i != len(self.data_file.buckets[hour]) - 1:
                        time_after = self.data_file.buckets[hour][i + 1]['timestamp']
                        missing_reg_interval(keys, values, time_before, time_after, hour)
                    else:
                        missing_reg_interval(keys, values, time_before, time_slow, hour)
                    if (time_slow - time_before) / float(Config.BOUNDARY) > 1:
                        count[hour] += round((time_slow - time_before) / float(Config.BOUNDARY))
        missing_regular = dict(zip(keys, values))

        logger.info(f"missing regular due to slow updates per hour: {count}")
        logger.info(f"missing regular due to slow updates: {missing_regular}")
        logger.info(f"total missing regular due to slow updates: {sum(count)}")
        Config.ANALYSIS.write("\n")
        return missing_regular
示例#8
0
 def latest_dr_ver(self):
     """
     Latest driver version per hour
     :return: list of driver versions per hour
     """
     dr_ver = [None] * 24
     no_dr_ver = []
     for hour in self.data_file.buckets:
         for pt in self.data_file.buckets[hour]:
             try:
                 pt_data = json.loads(pt['data'])
                 if 'dr_ver' in pt_data:
                     dr_ver[hour] = pt_data['dr_ver']
             except ValueError:
                 logger.error(f"Invalid json string: {pt['data']}")
                 raise ValueError
         if dr_ver[hour] is None:
             no_dr_ver.append(hour)
     if len(no_dr_ver) != 0:
         logger.info(f"no driver version in hours: {no_dr_ver}")
     logger.info(f"driver version: {dr_ver}")
     return dr_ver
示例#9
0
 def neighbor_count(self):
     """
     Return data point's neighbors length
     :return: list of neighbors size per hour
     """
     neighbors = [0] * 24
     prev = 0
     for hour in self.data_file.buckets:
         for pt in self.data_file.buckets[hour]:
             if pt['type'] == "slow":
                 try:
                     pt_data = json.loads(pt['data'])
                     neighbors_count = len(pt_data['neighbors'])
                     neighbors[hour] = neighbors_count
                     prev = neighbors_count
                 except ValueError:
                     logger.error(f"Invalid json string: {pt['data']}")
                     raise ValueError
                 except KeyError:
                     logger.error(f"slow update at {pt['timestamp']} does not have neighbors")
         if neighbors[hour] == 0:
             neighbors[hour] = prev
     logger.info(f"neighbors: {neighbors}")
     return neighbors